def request_analysis_batch(): # JSON control. if not check_json(request): return make_response( jsonify({"message": "Request body must be JSON."}), 400) # POST if request.method == 'POST': json_data = request.get_json() if 'url' in json_data and 'api_key' in json_data: url = json_data['url'] api_key = json_data['api_key'] # Check if a file has been sent. If it's an XML, get the URLs from there and append to url list. # Remember, for XML uploading, you need to send an empty array of 'url' as a data. if 'file' in json_data: xml_str = json_data['file'] url.extend( process_xml(xml_str) ) # Process XML, get xml-urls and add them to url list. analyser = Analyser() if 'mode' in json_data: if json_data['mode'] == "domain": if 'ignore' in json_data: if len(json_data['ignore']) > 0: result = analyser.request_analysis( url, api_key, "batch", dup_mode='domain', ignore_errors=json_data['ignore']) else: result = analyser.request_analysis( url, api_key, "batch", dup_mode='domain') else: result = analyser.request_analysis(url, api_key, "batch", dup_mode='domain') elif json_data['mode'] == "subdomain": if 'ignore' in json_data: if len(json_data['ignore']) > 0: result = analyser.request_analysis( url, api_key, "batch", dup_mode='subdomain', ignore_errors=json_data['ignore']) else: result = analyser.request_analysis( url, api_key, "batch", dup_mode='subdomain') else: result = analyser.request_analysis( url, api_key, "batch", dup_mode='subdomain') elif json_data['mode'] == "default": if 'ignore' in json_data: if len(json_data['ignore']) > 0: result = analyser.request_analysis( url, api_key, "batch", ignore_errors=json_data['ignore']) else: result = analyser.request_analysis( url, api_key, "batch") else: result = analyser.request_analysis( url, api_key, "batch") else: return make_response( jsonify({"message": "Invalid parameters."}), 400) else: result = analyser.request_analysis(url, api_key, "batch") return make_response(result, 200) else: return make_response(jsonify({"message": "Invalid parameters."}), 400) # 405 else: return Response(status=405)
def request_analysis(): # JSON control. if not check_json(request): return make_response( jsonify({"message": "Request body must be JSON."}), 400) # POST if request.method == 'POST': json_data = request.get_json() if 'url' in json_data and 'api_key' in json_data: url = json_data['url'] api_key = json_data['api_key'] analyser = Analyser() if 'mode' in json_data: if json_data['mode'] == "domain": if 'ignore' in json_data: if len(json_data['ignore']) > 0: result = analyser.request_analysis( url, api_key, "single", dup_mode='domain', ignore_errors=json_data['ignore']) else: result = analyser.request_analysis( url, api_key, "single", dup_mode='domain') else: result = analyser.request_analysis(url, api_key, "single", dup_mode='domain') elif json_data['mode'] == "subdomain": if 'ignore' in json_data: if len(json_data['ignore']) > 0: result = analyser.request_analysis( url, api_key, "single", dup_mode='subdomain', ignore_errors=json_data['ignore']) else: result = analyser.request_analysis( url, api_key, "single", dup_mode='subdomain') else: result = analyser.request_analysis( url, api_key, "single", dup_mode='subdomain') elif json_data['mode'] == "default": if 'ignore' in json_data: if len(json_data['ignore']) > 0: result = analyser.request_analysis( url, api_key, "single", ignore_errors=json_data['ignore']) else: result = analyser.request_analysis( url, api_key, "single") else: result = analyser.request_analysis( url, api_key, "single") else: return make_response( jsonify({ "message": "Invalid parameters." + json_data['mode'] }), 400) else: result = analyser.request_analysis(url, api_key, "single") return make_response(result, 200) else: return make_response(jsonify({"message": "Invalid parameters."}), 400) # 405 else: return Response(status=405)
def negAnalysis(args): #self.cfg = ConfigParser() #self.cfg.read(args.cfgFile) methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex'] nnegs = [1, 50, 100] dims = [50, 100] for dim in dims: if not args.result: mean_products = {} mean_products_list = [] for nneg in nnegs: cur_mean_products_list = [] for method in methods: modelfile = "%s.%s.n%d.d%d.p" % (args.dataname, method, nneg, dim) modelfile = os.path.join(args.mdir, modelfile) datafile = "%s.%s.bin" % (args.dataname, method) datafile = os.path.join(args.mdir, datafile) analyser = Analyser(datafile, modelfile, usePR=False) nSamples = 100 eRanges = [((0, 100), nSamples), ((100, 500), nSamples), ((500, 5000), nSamples), ((5000, analyser.t.ne), nSamples)] entIndices = analyser.getEntIdxs(eRanges) legendLabels = [] for a, b in eRanges: curLabel = "%d-%d" % (a[0], a[1]) legendLabels.append(curLabel) gp, mgp = analyser.getInnerProducts(entIndices, sampleMean=False, ent=True, normalized=True) print "%s\tneg %d" % (method, nneg) print mgp mean_products.setdefault(nneg, {})[method] = np.array( mgp, dtype=np.float32) cur_mean_products_list.append(np.float32(mgp[-1])) mean_products_list.append(cur_mean_products_list) outputfile = os.path.join(args.opdir, "%s.d%d" % (args.dataname, dim)) plotBars(mean_products_list, xlabel="#negatives", ylabel="Avg MeanProduct", legends=methods, xticks=nnegs, outfile=outputfile, show=False) with open(outputfile + ".p", "wb") as fout: pickle.dump( { "mean_products": mean_products, "mean_products_list": mean_products_list, "methods": methods, "nnegs": nnegs, "dim": dim }, fout) else: outputfile = os.path.join(args.opdir, "%s.d%d" % (args.dataname, dim)) with open(outputfile + ".p", "rb") as fin: """ mean_products = pickle.load(fin) mean_products_list = [] for nneg in nnegs: cur_products_list = [] for method in methods: cur_products_list.append(np.float32(mean_products[nneg][method][-1])) mean_products_list.append(cur_products_list) """ result = pickle.load(fin) mean_products_list = result['mean_products_list'] plotBars(mean_products_list, xlabel="#negatives", ylabel="Avg MeanProduct", legends=methods, xticks=nnegs, outfile=outputfile, show=False)
def negAnalysis(args): #self.cfg = ConfigParser() #self.cfg.read(args.cfgFile) methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex'] nnegs = [1, 50, 100] dims = [50, 100] useEnt = True for dim in dims: if not args.result: mean_products = {} mean_products_list = [] for nneg in nnegs: cur_mean_products_list = [] for method in methods: modelfile = "%s.%s.n%d.d%d.p" % (args.dataname, method, nneg, dim) modelfile = os.path.join(args.mdir, modelfile) if not os.path.exists(modelfile): print modelfile if args.type in ['ent']: nBins = 5 else: nBins = 4 mean_products.setdefault(nneg, {})[method] = np.array( np.zeros(nBins, ), dtype=np.float32) cur_mean_products_list.append(np.float32(0.0)) continue datafile = "%s.%s.bin" % (args.dataname, method) datafile = os.path.join(args.mdir, datafile) analyser = Analyser(datafile, modelfile, usePR=False) #nSamples = 100 #eRanges = [((0,100), nSamples), ((100,500), nSamples), ((500,5000), nSamples), ((5000, analyser.t.ne), nSamples)] #entIndices = analyser.getEntIdxs(eRanges) if args.type in ['ent']: nSamples = 100 ranges = [((0, 100), nSamples), ((100, 500), nSamples), ((500, 5000), nSamples), ((5000, analyser.t.ne), nSamples)] indices = analyser.getEntIdxs(ranges) useEnt = True else: nSamples = 100 if args.dataname in ['wn18']: ranges = [((0, 3), 3), ((3, 10), 7), ((10, analyser.t.nr), analyser.t.nr - 10) ] else: ranges = [((0, 100), nSamples), ((100, 500), nSamples), ((500, analyser.t.nr), nSamples)] indices = analyser.getRelIdxs(ranges) useEnt = False legendLabels = [] for a, b in ranges: curLabel = "%d-%d" % (a[0], a[1]) legendLabels.append(curLabel) if args.geometry in ['length']: gp, mgp = analyser.getLengths(indices, ent=useEnt) else: gp, mgp = analyser.getInnerProducts(indices, sampleMean=True, ent=useEnt, normalized=True) print "%s\tneg %d" % (method, nneg) print mgp mean_products.setdefault(nneg, {})[method] = np.array( mgp, dtype=np.float32) cur_mean_products_list.append(np.float32(mgp[-1])) mean_products_list.append(cur_mean_products_list) outputfile = os.path.join( args.opdir, args.geometry, "%s.%s.d%d" % (args.type, args.dataname, dim)) #plotBars(mean_products_list, xlabel="#negatives", ylabel="Avg MeanProduct", legends=methods, xticks=nnegs, outfile=outputfile, show=False) with open(outputfile + ".p", "wb") as fout: pickle.dump( { "mean_products": mean_products, "mean_products_list": mean_products_list, "methods": methods, "nnegs": nnegs, "dim": dim }, fout) else: outputfile = os.path.join( args.opdir, args.geometry, "%s.%s.d%d" % (args.type, args.dataname, dim)) with open(outputfile + ".p", "rb") as fin: """ mean_products = pickle.load(fin) mean_products_list = [] for nneg in nnegs: cur_products_list = [] for method in methods: cur_products_list.append(np.float32(mean_products[nneg][method][-1])) mean_products_list.append(cur_products_list) """ result = pickle.load(fin) mean_products_list = result['mean_products_list'] if args.geometry in ['length']: ylabel = 'length' else: ylabel = 'conicity' plotBars(mean_products_list, xlabel="#NegativeSamples", ylabel=ylabel, legends=methods, xticks=nnegs, outfile=outputfile, show=False)
def typeAnalysis(args): #self.cfg = ConfigParser() #self.cfg.read(args.cfgFile) #methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex'] #nnegs = [1, 50, 100] #dims = [50, 100] if args.best: methods = best_methods args.opdir = os.path.join(args.opdir, "best") else: methods = uniform_methods useEnt = True markers = "+.x3ov^<>p" if not args.result: mean_products = {} for method, vals in methods[args.dataname].iteritems(): nneg = vals['nneg'] dim = vals['dim'] modelfile = "%s.%s.n%d.d%d.p" %(args.dataname, method, nneg, dim) modelfile = os.path.join(args.mdir, modelfile) datafile = "%s.%s.bin" % (args.dataname, method) datafile = os.path.join(args.mdir, datafile) analyser = Analyser(datafile, modelfile, usePR=False) if args.type in ['ent']: nSamples = 100 ranges = [((0,100), nSamples), ((100,500), nSamples), ((500,5000), nSamples), ((5000, analyser.t.ne), nSamples)] indices = analyser.getEntIdxs(ranges) useEnt = True else: nSamples = 100 if args.dataname in ['wn18']: ranges = [((0,3), 3), ((3,10), 3), ((10,analyser.t.nr), 3)] else: ranges = [((0,100), nSamples), ((100,500), nSamples), ((500,analyser.t.nr), nSamples)] indices = analyser.getRelIdxs(ranges) useEnt = False legendLabels=[] for a,b in ranges: curLabel = "%d-%d"%(a[0],a[1]) legendLabels.append(curLabel) if args.geometry in ['length']: gp, mgp = analyser.getLengths(indices, ent=useEnt) else: gp, mgp = analyser.getInnerProducts(indices, sampleMean=True, ent=useEnt, normalized=True) print "%s\tneg %d" % (method,nneg) print mgp mean_products[method] = {"nneg":nneg, "dim" :dim, "gp": np.array(gp, dtype=np.float32)} #mean_products.setdefault(method, {}).setdefault(nneg, {})[dim] = np.array(gp, dtype=np.float32) outputfile = os.path.join(args.opdir, args.geometry, "%s.%s"%(args.type, args.dataname)) with open(outputfile+".p", "wb") as fout: pickle.dump({"mean_products":mean_products, "methods":methods[args.dataname], "legendLabels":legendLabels}, fout) else: outputfile = os.path.join(args.opdir, args.geometry, "%s.%s"%(args.type, args.dataname)) with open(outputfile+".p", "rb") as fin: """ mean_products = pickle.load(fin) mean_products_list = [] for nneg in nnegs: cur_products_list = [] for method in methods: cur_products_list.append(np.float32(mean_products[nneg][method][-1])) mean_products_list.append(cur_products_list) """ result = pickle.load(fin) mean_products = result['mean_products'] legendLabels = result['legendLabels'] x0 = legendLabels[-1].split("-")[0] legendLabels[-1] = "above %s" % x0 for method, vals in mean_products.iteritems(): if args.combined: outputfile = os.path.join(args.opdir, "combined", args.geometry, "%s.%s.%s.n%d.d%d"%(args.type, args.dataname, method, vals['nneg'], vals['dim'])) else: outputfile = os.path.join(args.opdir, args.geometry, "%s.%s.%s.n%d.d%d"%(args.type, args.dataname, method, vals['nneg'], vals['dim'])) if args.geometry in ['length']: xlabel = 'length' else: xlabel = 'atm' plotDistribution(vals['gp'], xlabel=xlabel, ylabel="Density", legends=legendLabels, modelName=method, outfile=outputfile, show=False, combined=args.combined)
def perfAnalysis(args): #self.cfg = ConfigParser() #self.cfg.read(args.cfgFile) methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex'] nnegs = [1, 50, 100] dims = [50, 100] mean_products = {} name_conicity = {} useEnt = True if not args.result: for dim in dims: for nneg in nnegs: for method in methods: modelfile = "%s.%s.n%d.d%d.p" % (args.dataname, method, nneg, dim) modelfile = os.path.join(args.mdir, modelfile) datafile = "%s.%s.bin" % (args.dataname, method) datafile = os.path.join(args.mdir, datafile) analyser = Analyser(datafile, modelfile, usePR=False) #nSamples = 100 #eRanges = [((0,100), nSamples), ((100,500), nSamples), ((500,5000), nSamples), ((5000, analyser.t.ne), nSamples)] #entIndices = analyser.getEntIdxs(eRanges) if args.type in ['ent']: nSamples = 100 ranges = [((0, 100), nSamples), ((100, 500), nSamples), ((500, 5000), nSamples), ((5000, analyser.t.ne), nSamples)] indices = analyser.getEntIdxs(ranges) useEnt = True else: nSamples = 100 if args.dataname in ['wn18']: ranges = [((0, 3), 3), ((3, 10), 7), ((10, analyser.t.nr), analyser.t.nr - 10) ] else: ranges = [((0, 100), nSamples), ((100, 500), nSamples), ((500, analyser.t.nr), nSamples)] indices = analyser.getRelIdxs(ranges) useEnt = False legendLabels = [] for a, b in ranges: curLabel = "%d-%d" % (a[0], a[1]) legendLabels.append(curLabel) if args.geometry in ['length']: gp, mgp = analyser.getLengths(indices, ent=useEnt) else: gp, mgp = analyser.getInnerProducts(indices, sampleMean=True, ent=useEnt, normalized=True) print "%s\tneg %d" % (method, nneg) print mgp mean_products.setdefault(dim, {}).setdefault( nneg, {})[method] = np.array(mgp, dtype=np.float32) mname = "%s.%s.n%d.d%d" % (args.dataname, method, nneg, dim) name_conicity[mname] = mgp[-1] outputfile = os.path.join(args.opdir, args.geometry, "%s.%s" % (args.type, args.dataname)) with open(outputfile + ".p", "wb") as fout: pickle.dump( { "mean_products": mean_products, "methods": methods, "nnegs": nnegs, "dims": dims, "name_conicity": name_conicity }, fout) #pickle.dump({"mean_products":mean_products, "mean_products_list":mean_products_list, "methods":methods, "nnegs":nnegs, "dim":dim}, fout) else: outputfile = os.path.join(args.opdir, args.geometry, "%s.%s" % (args.type, args.dataname)) with open(outputfile + ".p", "rb") as fin: result = pickle.load(fin) if "perfs" not in result: with open(args.perffile, "rb") as fin: """ mean_products = pickle.load(fin) mean_products_list = [] for nneg in nnegs: cur_products_list = [] for method in methods: cur_products_list.append(np.float32(mean_products[nneg][method][-1])) mean_products_list.append(cur_products_list) """ result['perfs'] = pickle.load(fin) #perfs = readPerfs(args.perffile) whitelist = [] for method, nneg, dim in product(result['methods'], result['nnegs'], result['dims']): if dim == 100: if method in ['hole', 'complex', 'distmult']: whitelist.append("%s.n%d.d%d" % (method, nneg, dim)) elif method in ['transe', 'stranse'] and nneg in [1]: whitelist.append("%s.n%d.d%d" % (method, nneg, dim)) elif method in ['transr']: if nneg == 1: whitelist.append("%s.n%d.d%d" % (method, nneg, dim)) elif dim == 100: whitelist.append("%s.n%d.d%d" % (method, nneg, dim)) if args.geometry in ['length']: plotConePerf(methods, nnegs, dims, result, outputfile, xlabel="length", whitelist=whitelist, show=True) else: plotConePerf(methods, nnegs, dims, result, outputfile, xlabel="conicity", whitelist=whitelist, show=True)