Esempio n. 1
0
def runcrawl(request):
    print "request"
    crawl_spec_dict = {}
    #if request.is_ajax():
    if request.method == 'GET':
    #id_val = 1
        id_val = request.GET['id']
        crawl = Crawl.objects.get(id=id_val)
        print crawl
        crawl_spec_dict = {"login_script": crawl.login_script, "login_url": crawl.login_url, \
                            "form_values_script": crawl.form_values_script, "base_address": crawl.base_address, \
                            "start_url": crawl.start_url, "black_list_urls":crawl.black_list_urls, \
                            "scope_urls":crawl.scope_urls, "wait_time": crawl.wait_time, "depth":crawl.depth } 
                            #"proxy_address": crawl.proxy_address}
        print crawl_spec_dict
        fsm = initializeParams(crawl_spec_dict)
        #print "graph object",fsm.graph.nodes()
        #pathSourcetoSink(fsm, crawl)
        if fsm:
            number_of_nodes = fsm.graph.number_of_nodes()
            number_of_edges = len(fsm.graph.edges())
            nodes = getNodes(fsm.graph)
            edges = getEdges(fsm.graph)
            crawl = Crawl.objects.latest("id")
            #pathSourcetoSink(fsm, crawl)
            print crawl.id
            workflows = getWfs(crawl.id)
            print "workflows"
            print workflows
            #print edges
            #print nodes
            return render(request, 'run.html', {'num_nodes': number_of_nodes,'num_edges':number_of_edges, 'nodes': nodes, 'edges': edges, 'workflows': workflows})
        else:
            return render(request, "error.html")            
Esempio n. 2
0
def crawlingController(request):
    crawling_spec = {}
    if request.method == 'POST':
        print request.FILES
        crawling_spec["login_script"] = request.FILES.get('login-script', None)
        crawling_spec["login_url"] = request.POST.get('login-url', "")
        crawling_spec["form_values_script"] = request.FILES.get('form-values-script', None)
        crawling_spec["base_address"] = request.POST.get('base-address', "")
        crawling_spec["start_url"] = request.POST.get('start-url', "")
        crawling_spec["black_list_urls"] = request.POST.get('black-list-urls', "")
        crawling_spec["scope_urls"] = request.POST.get('scope-urls', "")
        crawling_spec["wait_time"] = request.POST.get('wait-time', "")
        crawling_spec["depth"] = request.POST.get('depth', "100")
        #crawling_spec["proxy_address"] = request.POST.get('proxy-address', "")
        #print crawling_spec
        login_data = ""
        form_data = ""
        lines = crawling_spec['login_script'].readlines()
        for line in lines:
            login_data = login_data+line.strip()
        lines = crawling_spec['form_values_script'].readlines()
        for line in lines:
            form_data = form_data+line.strip()

        bs =  BeautifulSoup(form_data)
        print bs
        #print bs.findAll("tr") 
        obj = Crawl(login_script =  crawling_spec["login_script"], login_url = crawling_spec["login_url"] , \
                                    form_values_script = crawling_spec["form_values_script"] , \
                                    base_address =  crawling_spec["base_address"],start_url =  crawling_spec["start_url"], \
                                    black_list_urls =  crawling_spec["black_list_urls"], \
                                    scope_urls =  crawling_spec["scope_urls"], \
                                    wait_time =  crawling_spec["wait_time"], \
                                    depth = crawling_spec["depth"])
                                    #proxy_address = crawling_spec["proxy_address"])
        #print login_script, login_url, form_values_script, base_address, start_url, black_list_urls, scope_urls, wait_time
        obj.save()
        crawling_spec["login_script"] = login_data
        crawling_spec["form_values_script"] = form_data
        fsm = initializeParams(crawling_spec)
        #pathSourcetoSink(fsm, crawl, crawl.login_url)
        #print graph
        if fsm:
            returnJsonGraph(fsm.graph)
            number_of_nodes = fsm.graph.number_of_nodes()
            number_of_edges = len(fsm.graph.edges())
            nodes = getNodes(fsm.graph)
            edges = getEdges(fsm.graph)
            crawl = Crawl.objects.latest("id")
            pathSourcetoSink(fsm, crawl)    
            print crawl.id
            workflows = getWfs(crawl.id)
            print "workflows"
            print workflows
            #print edges
            #print nodes
            return render(request, 'run.html', {'num_nodes': number_of_nodes,'num_edges':number_of_edges, 'nodes': nodes, 'edges': edges, 'workflows': workflows})
        else:
            return render(request, "error.html")