def crawl_view(request): """Renders the crawl page.""" if request.method == 'POST': form = crawl_form(request.POST) form.is_valid() # called from crawh.html if form.is_valid(): from_date = form.cleaned_data['from_date'] nrpages = form.cleaned_data['nrpages_field'] site_choices = form.cleaned_data['site_choices_field'] scrape_choices = form.cleaned_data['scrape_choices_field'] rss_field = form.cleaned_data['rss_field'] product_field = form.cleaned_data['product_field'] username = form.cleaned_data['username'] password = form.cleaned_data['password'] if from_date == None: today = datetime.now() from_date = datetime(today.year-1, 1, 1, 0, 0, 0) if 'crawl_si_sites' in form.data: for site_choice in site_choices: if site_choice == 'apf': crawl.crawl_apf(scrape_choices, nrpages) elif site_choice == 'cosmetics': crawl.crawl_cosmetic(scrape_choices, nrpages) else: crawl.si_site(site_choice, nrpages) elif 'crawl_mi' in form.data: if not market.index_posts(from_date, username, password): form.add_form_error("Could not index category posts") if 'crawl_pi' in form.data: if product_field == '': form.add_form_error("Specify a product") else: if not product.crawl_product(index_choices, product_field): form.add_form_error("Could not save product data") if 'index_pi' in form.data: if product_field == '': form.add_form_error("Specify a product") else: if not product.index_product(index_choices, product_field): form.add_form_error("Could not retrieve product data") if 'crawl_feedly' in form.data: if not crawl.crawl_feedly(from_date, rss_field): form.add_form_error("Could not retrieve feedly data, expired") if 'return_survey' in form.data: pass return render(request, 'app/crawl.html', {'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'scrape_li' : models.scrape_li } ) else: form = crawl_form(initial={'scrape_choices_field':['product', 'blog'], 'excel_choices_field':['recreate']}) return render(request, 'app/crawl.html', {'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'message':'IFF - Insight Platform', 'year':datetime.now().year})
def crawl_view(request): """Renders the crawl page.""" sections = {} if request.method == 'POST': form = crawl_form(request.POST) form.is_valid() # called from crawh.html if form.is_valid(): from_dt = form.cleaned_data['from_dt'] nrpages = form.cleaned_data['nrpages_field'] site_choices = form.cleaned_data['site_choices_field'] scrape_choices = form.cleaned_data['scrape_choices_field'] rss_field = form.cleaned_data['rss_field'] brand_name = form.cleaned_data['brand_name_field'] brand_variant = form.cleaned_data['brand_variant_field'] perfume_code = form.cleaned_data['perfume_code_field'].lower() username = form.cleaned_data['username'] password = form.cleaned_data['password'] if from_dt == None: today = datetime.now() from_dt = datetime(today.year-1, 1, 1).date() # Crawl BLOG Sites if 'crawl_si_sites' in form.data: for site_choice in site_choices: if site_choice == 'apf': crawl.crawl_apf(from_dt, scrape_choices, nrpages) elif site_choice == 'cosmetics': crawl.crawl_cosmetic(from_dt, scrape_choices, nrpages) else: crawl.crawl_si_site(from_dt, site_choice, nrpages) # Crawl Market Intelligence Sites if 'crawl_feedly' in form.data: if not crawl.crawl_feedly(from_dt, rss_field): form.add_form_error("Could not retrieve feedly data, expired") elif 'crawl_mi' in form.data: if not market.index_posts(from_dt, username, password): form.add_form_error("Could not index category posts") # Crawl Product Intelligence Sites if 'crawl_pi' in form.data and 'pi_site_choices_field' in request.POST: pi_site_choices = request.POST['pi_site_choices_field'] if perfume_code == '': form.add_form_error("Specify a product code") else: if pi_site_choices == 'fragrantica': success = product.crawl_fragrantica(from_dt, brand_name, brand_variant, perfume_code) elif pi_site_choices == 'amazon': success = product.crawl_amazon(from_dt, brand_name, brand_variant, perfume_code) elif pi_site_choices == 'basenotes': success = product.crawl_basenotes(from_dt, brand_name, brand_variant, perfume_code) if not success: form.add_form_error("Could not save product data") if 'retrieve_pi' in form.data and 'pi_site_choices_field' in request.POST: pi_site_choices = request.POST['pi_site_choices_field'] if perfume_code == '': form.add_form_error("Specify a product code") else: if pi_site_choices == 'fragrantica': success = product.retrieve_fragrantica(perfume_code) elif pi_site_choices == 'amazon': success = product.retrieve_amazon(perfume_code) if pi_site_choices == 'basenotes': success = product.retrieve_basenotes(perfume_code) if not success: form.add_form_error("Could not save product data") if 'return_survey' in form.data: pass return render( request, 'app/crawl.html', {'site' : FMI.settings.site, 'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'sections' : sections, 'scrape_li' : models.scrape_li } ) else: form = crawl_form(initial={'scrape_choices_field':['product', 'blog'], 'excel_choices_field':['recreate']}) if 'crawl_pi' in request.GET: sections['crawl_mi'] = 0 sections['crawl_feedly'] = 0 sections['crawl_si_sites'] = 0 sections['crawl_pi'] = 1 else: sections['crawl_mi'] = 1 sections['crawl_feedly'] = 1 sections['crawl_si_sites'] = 1 sections['crawl_pi'] = 1 return render( request, 'app/crawl.html', {'site' : FMI.settings.site, 'form': form, 'es_hosts' : FMI.settings.ES_HOSTS, 'sections' : sections, 'year':datetime.now().year})
def crawl_view(request): """Renders the crawl page.""" if request.method == 'POST': form = crawl_form(request.POST) form.is_valid() ci_filename = form.cleaned_data['ci_filename_field'] cimap_filename = form.cleaned_data['cimap_filename_field'] # called form crawlresults.html if 'crawl_survey' in form.data: crawl.crawl_survey(ci_filename, cimap_filename) # called from crawh.html if form.is_valid(): from_date = form.cleaned_data['from_date'] nrpages = form.cleaned_data['nrpages_field'] site_choices = form.cleaned_data['site_choices_field'] scrape_choices = form.cleaned_data['scrape_choices_field'] rss_field = form.cleaned_data['rss_field'] product_field = form.cleaned_data['product_field'] username = form.cleaned_data['username'] password = form.cleaned_data['password'] cft_filename = form.cleaned_data['cft_filename_field'] excel_choices = form.cleaned_data['excel_choices_field'] excel_filename = form.cleaned_data['excel_filename_field'] ci_filename = form.cleaned_data['ci_filename_field'] cimap_filename = form.cleaned_data['cimap_filename_field'] if from_date == None: today = datetime.now() from_date = datetime(today.year - 1, 1, 1, 0, 0, 0) if 'crawl_si_sites' in form.data: for site_choice in site_choices: if site_choice == 'apf': crawl.crawl_apf(scrape_choices, nrpages) elif site_choice == 'cosmetics': crawl.crawl_cosmetic(scrape_choices, nrpages) else: crawl.si_site(site_choice, nrpages) elif 'crawl_mi' in form.data: if not market.index_posts(from_date, username, password): form.add_form_error("Could not index category posts") if 'crawl_excel' in form.data: if not crawl.crawl_excel(excel_filename, excel_choices): form.add_form_error( "Could not retrieve or index excel file") if 'crawl_pi' in form.data: if product_field == '': form.add_form_error("Specify a product") else: if not product.crawl_product(index_choices, product_field): form.add_form_error("Could not save product data") if 'index_pi' in form.data: if product_field == '': form.add_form_error("Specify a product") else: if not product.index_product(index_choices, product_field): form.add_form_error("Could not retrieve product data") if 'crawl_feedly' in form.data: if not crawl.crawl_feedly(from_date, rss_field): form.add_form_error( "Could not retrieve feedly data, expired") if 'crawl_scentemotion' in form.data: crawl.crawl_scentemotion(cft_filename) answers = [] for question in survey.qa.keys(): answers.extend(list(survey.qa[question].keys())) if 'map_survey' in form.data: col_map = crawl.map_survey(ci_filename, cimap_filename) answers = sorted(answers) context = { 'form': form, 'col_map': col_map, 'answers': answers, 'qa': survey.qa, } return render(request, 'app/crawlresults.html', context) if 'return_survey' in form.data: pass return render( request, 'app/crawl.html', { 'form': form, 'es_hosts': FMI.settings.ES_HOSTS, 'scrape_li': models.scrape_li }) else: form = crawl_form( initial={ 'scrape_choices_field': ['product', 'blog'], 'excel_choices_field': ['recreate'] }) return render(request, 'app/crawl.html', { 'form': form, 'es_hosts': FMI.settings.ES_HOSTS }, context_instance=RequestContext( request, { 'message': 'IFF - Insight Platform', 'year': datetime.now().year, }))