コード例 #1
0
ファイル: mrserver.py プロジェクト: SriganeshNk/VCrawlBackEnd
def check_vulnerabilities():
    path = request.form.get("url")
    page = request.form.get("pages")
    domain = fc.findDomain(path)
    path = fc.getCorrectURL(path)
    urls = fc.crawl(path, domain, int(page))
    data = open("./urllist.txt", "w")
    for url in urls:
        data.write("%s\n" % (url))
    data.close()

    # Cleans the HDFS paths and loads the urllists to HDFS
    subprocess.call("./mrstartup.sh", shell=True)
    
    # Starts the MR job for the urls in the urllist
    subprocess.call("./mrlauncher.sh --input=/user/smullassery/syssec/urllist.txt --output=/user/smullassery/syssec/output", shell=True)
    
    # Reads the result from MR job
    result = subprocess.Popen(["hdfs", "dfs" ,"-cat", "/user/smullassery/syssec/output/*"], stdout=subprocess.PIPE)
    result_string, error = result.communicate()
    results = result_string.split('\t\n')
    print("Map Reduce job completed")
    final_result = []
    for temp in results:
        if temp:
            list1 = ast.literal_eval(temp)
            if list1:
                final_result.extend(list1)
    
    out = final_result
    return jsonify(output=out)
コード例 #2
0
def create_crawl():
    path = request.form.get("url")
    page = request.form.get("pages")
    path = fc.getCorrectURL(path)
    if path == None:
        abort(404)
    domain = fc.findDomain(path)
    urls = fc.crawl(path, domain, int(page))
    if len(urls) > int(page):
        urls = urls[:int(page)]
    out = fc.analyseMain(urls)
    if len(out) == 0:
        response = jsonify(output=out)
        response.status_code = 500
        return response
    return jsonify(output=out)