def runcrawl(request): print "request" crawl_spec_dict = {} #if request.is_ajax(): if request.method == 'GET': #id_val = 1 id_val = request.GET['id'] crawl = Crawl.objects.get(id=id_val) print crawl crawl_spec_dict = {"login_script": crawl.login_script, "login_url": crawl.login_url, \ "form_values_script": crawl.form_values_script, "base_address": crawl.base_address, \ "start_url": crawl.start_url, "black_list_urls":crawl.black_list_urls, \ "scope_urls":crawl.scope_urls, "wait_time": crawl.wait_time, "depth":crawl.depth } #"proxy_address": crawl.proxy_address} print crawl_spec_dict fsm = initializeParams(crawl_spec_dict) #print "graph object",fsm.graph.nodes() #pathSourcetoSink(fsm, crawl) if fsm: number_of_nodes = fsm.graph.number_of_nodes() number_of_edges = len(fsm.graph.edges()) nodes = getNodes(fsm.graph) edges = getEdges(fsm.graph) crawl = Crawl.objects.latest("id") #pathSourcetoSink(fsm, crawl) print crawl.id workflows = getWfs(crawl.id) print "workflows" print workflows #print edges #print nodes return render(request, 'run.html', {'num_nodes': number_of_nodes,'num_edges':number_of_edges, 'nodes': nodes, 'edges': edges, 'workflows': workflows}) else: return render(request, "error.html")
def crawlingController(request): crawling_spec = {} if request.method == 'POST': print request.FILES crawling_spec["login_script"] = request.FILES.get('login-script', None) crawling_spec["login_url"] = request.POST.get('login-url', "") crawling_spec["form_values_script"] = request.FILES.get('form-values-script', None) crawling_spec["base_address"] = request.POST.get('base-address', "") crawling_spec["start_url"] = request.POST.get('start-url', "") crawling_spec["black_list_urls"] = request.POST.get('black-list-urls', "") crawling_spec["scope_urls"] = request.POST.get('scope-urls', "") crawling_spec["wait_time"] = request.POST.get('wait-time', "") crawling_spec["depth"] = request.POST.get('depth', "100") #crawling_spec["proxy_address"] = request.POST.get('proxy-address', "") #print crawling_spec login_data = "" form_data = "" lines = crawling_spec['login_script'].readlines() for line in lines: login_data = login_data+line.strip() lines = crawling_spec['form_values_script'].readlines() for line in lines: form_data = form_data+line.strip() bs = BeautifulSoup(form_data) print bs #print bs.findAll("tr") obj = Crawl(login_script = crawling_spec["login_script"], login_url = crawling_spec["login_url"] , \ form_values_script = crawling_spec["form_values_script"] , \ base_address = crawling_spec["base_address"],start_url = crawling_spec["start_url"], \ black_list_urls = crawling_spec["black_list_urls"], \ scope_urls = crawling_spec["scope_urls"], \ wait_time = crawling_spec["wait_time"], \ depth = crawling_spec["depth"]) #proxy_address = crawling_spec["proxy_address"]) #print login_script, login_url, form_values_script, base_address, start_url, black_list_urls, scope_urls, wait_time obj.save() crawling_spec["login_script"] = login_data crawling_spec["form_values_script"] = form_data fsm = initializeParams(crawling_spec) #pathSourcetoSink(fsm, crawl, crawl.login_url) #print graph if fsm: returnJsonGraph(fsm.graph) number_of_nodes = fsm.graph.number_of_nodes() number_of_edges = len(fsm.graph.edges()) nodes = getNodes(fsm.graph) edges = getEdges(fsm.graph) crawl = Crawl.objects.latest("id") pathSourcetoSink(fsm, crawl) print crawl.id workflows = getWfs(crawl.id) print "workflows" print workflows #print edges #print nodes return render(request, 'run.html', {'num_nodes': number_of_nodes,'num_edges':number_of_edges, 'nodes': nodes, 'edges': edges, 'workflows': workflows}) else: return render(request, "error.html")