コード例 #1
0
def request_analysis_batch():
    # JSON control.
    if not check_json(request):
        return make_response(
            jsonify({"message": "Request body must be JSON."}), 400)

    # POST
    if request.method == 'POST':
        json_data = request.get_json()
        if 'url' in json_data and 'api_key' in json_data:
            url = json_data['url']
            api_key = json_data['api_key']

            # Check if a file has been sent. If it's an XML, get the URLs from there and append to url list.
            # Remember, for XML uploading, you need to send an empty array of 'url' as a data.
            if 'file' in json_data:
                xml_str = json_data['file']
                url.extend(
                    process_xml(xml_str)
                )  # Process XML, get xml-urls and add them to url list.

            analyser = Analyser()

            if 'mode' in json_data:
                if json_data['mode'] == "domain":
                    if 'ignore' in json_data:
                        if len(json_data['ignore']) > 0:
                            result = analyser.request_analysis(
                                url,
                                api_key,
                                "batch",
                                dup_mode='domain',
                                ignore_errors=json_data['ignore'])
                        else:
                            result = analyser.request_analysis(
                                url, api_key, "batch", dup_mode='domain')
                    else:
                        result = analyser.request_analysis(url,
                                                           api_key,
                                                           "batch",
                                                           dup_mode='domain')
                elif json_data['mode'] == "subdomain":
                    if 'ignore' in json_data:
                        if len(json_data['ignore']) > 0:
                            result = analyser.request_analysis(
                                url,
                                api_key,
                                "batch",
                                dup_mode='subdomain',
                                ignore_errors=json_data['ignore'])
                        else:
                            result = analyser.request_analysis(
                                url, api_key, "batch", dup_mode='subdomain')
                    else:
                        result = analyser.request_analysis(
                            url, api_key, "batch", dup_mode='subdomain')
                elif json_data['mode'] == "default":
                    if 'ignore' in json_data:
                        if len(json_data['ignore']) > 0:
                            result = analyser.request_analysis(
                                url,
                                api_key,
                                "batch",
                                ignore_errors=json_data['ignore'])
                        else:
                            result = analyser.request_analysis(
                                url, api_key, "batch")
                    else:
                        result = analyser.request_analysis(
                            url, api_key, "batch")
                else:
                    return make_response(
                        jsonify({"message": "Invalid parameters."}), 400)
            else:
                result = analyser.request_analysis(url, api_key, "batch")
            return make_response(result, 200)
        else:
            return make_response(jsonify({"message": "Invalid parameters."}),
                                 400)
    # 405
    else:
        return Response(status=405)
コード例 #2
0
def request_analysis():
    # JSON control.
    if not check_json(request):
        return make_response(
            jsonify({"message": "Request body must be JSON."}), 400)

    # POST
    if request.method == 'POST':
        json_data = request.get_json()
        if 'url' in json_data and 'api_key' in json_data:
            url = json_data['url']
            api_key = json_data['api_key']

            analyser = Analyser()

            if 'mode' in json_data:
                if json_data['mode'] == "domain":
                    if 'ignore' in json_data:
                        if len(json_data['ignore']) > 0:
                            result = analyser.request_analysis(
                                url,
                                api_key,
                                "single",
                                dup_mode='domain',
                                ignore_errors=json_data['ignore'])
                        else:
                            result = analyser.request_analysis(
                                url, api_key, "single", dup_mode='domain')
                    else:
                        result = analyser.request_analysis(url,
                                                           api_key,
                                                           "single",
                                                           dup_mode='domain')
                elif json_data['mode'] == "subdomain":
                    if 'ignore' in json_data:
                        if len(json_data['ignore']) > 0:
                            result = analyser.request_analysis(
                                url,
                                api_key,
                                "single",
                                dup_mode='subdomain',
                                ignore_errors=json_data['ignore'])
                        else:
                            result = analyser.request_analysis(
                                url, api_key, "single", dup_mode='subdomain')
                    else:
                        result = analyser.request_analysis(
                            url, api_key, "single", dup_mode='subdomain')
                elif json_data['mode'] == "default":
                    if 'ignore' in json_data:
                        if len(json_data['ignore']) > 0:
                            result = analyser.request_analysis(
                                url,
                                api_key,
                                "single",
                                ignore_errors=json_data['ignore'])
                        else:
                            result = analyser.request_analysis(
                                url, api_key, "single")
                    else:
                        result = analyser.request_analysis(
                            url, api_key, "single")
                else:
                    return make_response(
                        jsonify({
                            "message":
                            "Invalid parameters." + json_data['mode']
                        }), 400)
            else:
                result = analyser.request_analysis(url, api_key, "single")
            return make_response(result, 200)
        else:
            return make_response(jsonify({"message": "Invalid parameters."}),
                                 400)
    # 405
    else:
        return Response(status=405)
コード例 #3
0
def negAnalysis(args):
    #self.cfg = ConfigParser()
    #self.cfg.read(args.cfgFile)
    methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex']
    nnegs = [1, 50, 100]
    dims = [50, 100]
    for dim in dims:
        if not args.result:
            mean_products = {}
            mean_products_list = []
            for nneg in nnegs:
                cur_mean_products_list = []
                for method in methods:
                    modelfile = "%s.%s.n%d.d%d.p" % (args.dataname, method,
                                                     nneg, dim)
                    modelfile = os.path.join(args.mdir, modelfile)
                    datafile = "%s.%s.bin" % (args.dataname, method)
                    datafile = os.path.join(args.mdir, datafile)
                    analyser = Analyser(datafile, modelfile, usePR=False)
                    nSamples = 100
                    eRanges = [((0, 100), nSamples), ((100, 500), nSamples),
                               ((500, 5000), nSamples),
                               ((5000, analyser.t.ne), nSamples)]
                    entIndices = analyser.getEntIdxs(eRanges)
                    legendLabels = []
                    for a, b in eRanges:
                        curLabel = "%d-%d" % (a[0], a[1])
                        legendLabels.append(curLabel)
                    gp, mgp = analyser.getInnerProducts(entIndices,
                                                        sampleMean=False,
                                                        ent=True,
                                                        normalized=True)
                    print "%s\tneg %d" % (method, nneg)
                    print mgp
                    mean_products.setdefault(nneg, {})[method] = np.array(
                        mgp, dtype=np.float32)
                    cur_mean_products_list.append(np.float32(mgp[-1]))
                mean_products_list.append(cur_mean_products_list)
            outputfile = os.path.join(args.opdir,
                                      "%s.d%d" % (args.dataname, dim))
            plotBars(mean_products_list,
                     xlabel="#negatives",
                     ylabel="Avg MeanProduct",
                     legends=methods,
                     xticks=nnegs,
                     outfile=outputfile,
                     show=False)
            with open(outputfile + ".p", "wb") as fout:
                pickle.dump(
                    {
                        "mean_products": mean_products,
                        "mean_products_list": mean_products_list,
                        "methods": methods,
                        "nnegs": nnegs,
                        "dim": dim
                    }, fout)
        else:
            outputfile = os.path.join(args.opdir,
                                      "%s.d%d" % (args.dataname, dim))
            with open(outputfile + ".p", "rb") as fin:
                """
                mean_products = pickle.load(fin)
                mean_products_list = []
                for nneg in nnegs:
                    cur_products_list = []
                    for method in methods:
                        cur_products_list.append(np.float32(mean_products[nneg][method][-1]))
                    mean_products_list.append(cur_products_list)
                """
                result = pickle.load(fin)
                mean_products_list = result['mean_products_list']
                plotBars(mean_products_list,
                         xlabel="#negatives",
                         ylabel="Avg MeanProduct",
                         legends=methods,
                         xticks=nnegs,
                         outfile=outputfile,
                         show=False)
コード例 #4
0
def negAnalysis(args):
    #self.cfg = ConfigParser()
    #self.cfg.read(args.cfgFile)
    methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex']
    nnegs = [1, 50, 100]
    dims = [50, 100]
    useEnt = True
    for dim in dims:
        if not args.result:
            mean_products = {}
            mean_products_list = []
            for nneg in nnegs:
                cur_mean_products_list = []
                for method in methods:
                    modelfile = "%s.%s.n%d.d%d.p" % (args.dataname, method,
                                                     nneg, dim)
                    modelfile = os.path.join(args.mdir, modelfile)
                    if not os.path.exists(modelfile):
                        print modelfile
                        if args.type in ['ent']:
                            nBins = 5
                        else:
                            nBins = 4
                        mean_products.setdefault(nneg, {})[method] = np.array(
                            np.zeros(nBins, ), dtype=np.float32)
                        cur_mean_products_list.append(np.float32(0.0))
                        continue
                    datafile = "%s.%s.bin" % (args.dataname, method)
                    datafile = os.path.join(args.mdir, datafile)
                    analyser = Analyser(datafile, modelfile, usePR=False)
                    #nSamples = 100
                    #eRanges = [((0,100), nSamples), ((100,500), nSamples), ((500,5000), nSamples), ((5000, analyser.t.ne), nSamples)]
                    #entIndices = analyser.getEntIdxs(eRanges)
                    if args.type in ['ent']:
                        nSamples = 100
                        ranges = [((0, 100), nSamples), ((100, 500), nSamples),
                                  ((500, 5000), nSamples),
                                  ((5000, analyser.t.ne), nSamples)]
                        indices = analyser.getEntIdxs(ranges)
                        useEnt = True
                    else:
                        nSamples = 100
                        if args.dataname in ['wn18']:
                            ranges = [((0, 3), 3), ((3, 10), 7),
                                      ((10, analyser.t.nr), analyser.t.nr - 10)
                                      ]
                        else:
                            ranges = [((0, 100), nSamples),
                                      ((100, 500), nSamples),
                                      ((500, analyser.t.nr), nSamples)]
                        indices = analyser.getRelIdxs(ranges)
                        useEnt = False
                    legendLabels = []
                    for a, b in ranges:
                        curLabel = "%d-%d" % (a[0], a[1])
                        legendLabels.append(curLabel)
                    if args.geometry in ['length']:
                        gp, mgp = analyser.getLengths(indices, ent=useEnt)
                    else:
                        gp, mgp = analyser.getInnerProducts(indices,
                                                            sampleMean=True,
                                                            ent=useEnt,
                                                            normalized=True)
                    print "%s\tneg %d" % (method, nneg)
                    print mgp
                    mean_products.setdefault(nneg, {})[method] = np.array(
                        mgp, dtype=np.float32)
                    cur_mean_products_list.append(np.float32(mgp[-1]))
                mean_products_list.append(cur_mean_products_list)
            outputfile = os.path.join(
                args.opdir, args.geometry,
                "%s.%s.d%d" % (args.type, args.dataname, dim))
            #plotBars(mean_products_list, xlabel="#negatives", ylabel="Avg MeanProduct", legends=methods, xticks=nnegs, outfile=outputfile, show=False)
            with open(outputfile + ".p", "wb") as fout:
                pickle.dump(
                    {
                        "mean_products": mean_products,
                        "mean_products_list": mean_products_list,
                        "methods": methods,
                        "nnegs": nnegs,
                        "dim": dim
                    }, fout)
        else:
            outputfile = os.path.join(
                args.opdir, args.geometry,
                "%s.%s.d%d" % (args.type, args.dataname, dim))
            with open(outputfile + ".p", "rb") as fin:
                """
                mean_products = pickle.load(fin)
                mean_products_list = []
                for nneg in nnegs:
                    cur_products_list = []
                    for method in methods:
                        cur_products_list.append(np.float32(mean_products[nneg][method][-1]))
                    mean_products_list.append(cur_products_list)
                """
                result = pickle.load(fin)
                mean_products_list = result['mean_products_list']
                if args.geometry in ['length']:
                    ylabel = 'length'
                else:
                    ylabel = 'conicity'
                plotBars(mean_products_list,
                         xlabel="#NegativeSamples",
                         ylabel=ylabel,
                         legends=methods,
                         xticks=nnegs,
                         outfile=outputfile,
                         show=False)
コード例 #5
0
def typeAnalysis(args):
    #self.cfg = ConfigParser()
    #self.cfg.read(args.cfgFile)
    #methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex']
    #nnegs  = [1, 50, 100]
    #dims = [50, 100]
    if args.best:
	methods = best_methods
	args.opdir = os.path.join(args.opdir, "best")
    else:
	methods = uniform_methods
    useEnt = True
    markers = "+.x3ov^<>p"
    if not args.result:
        mean_products = {}
        for method, vals in methods[args.dataname].iteritems():
            nneg = vals['nneg']
            dim = vals['dim']
            modelfile = "%s.%s.n%d.d%d.p" %(args.dataname, method, nneg, dim)
            modelfile = os.path.join(args.mdir, modelfile)
            datafile = "%s.%s.bin" % (args.dataname, method)
            datafile = os.path.join(args.mdir, datafile)
            analyser = Analyser(datafile, modelfile, usePR=False)
            if args.type in ['ent']:
                nSamples = 100
                ranges = [((0,100), nSamples), ((100,500), nSamples), ((500,5000), nSamples), ((5000, analyser.t.ne), nSamples)]
                indices = analyser.getEntIdxs(ranges)
                useEnt = True
            else:
                nSamples = 100
                if args.dataname in ['wn18']:
                    ranges = [((0,3), 3), ((3,10), 3), ((10,analyser.t.nr), 3)]
                else:
                    ranges = [((0,100), nSamples), ((100,500), nSamples), ((500,analyser.t.nr), nSamples)]
                indices = analyser.getRelIdxs(ranges)
                useEnt = False
            legendLabels=[]
            for a,b in ranges:
                curLabel = "%d-%d"%(a[0],a[1])
                legendLabels.append(curLabel)

            if args.geometry in ['length']:
                gp, mgp = analyser.getLengths(indices, ent=useEnt)
            else:
                gp, mgp = analyser.getInnerProducts(indices, sampleMean=True, ent=useEnt, normalized=True)
            print "%s\tneg %d" % (method,nneg)
            print mgp
            mean_products[method] = {"nneg":nneg, "dim" :dim, "gp": np.array(gp, dtype=np.float32)}
            #mean_products.setdefault(method, {}).setdefault(nneg, {})[dim] = np.array(gp, dtype=np.float32)
        outputfile = os.path.join(args.opdir, args.geometry, "%s.%s"%(args.type, args.dataname))
        with open(outputfile+".p", "wb") as fout:
            pickle.dump({"mean_products":mean_products, "methods":methods[args.dataname], "legendLabels":legendLabels}, fout)
    else:
        outputfile = os.path.join(args.opdir, args.geometry, "%s.%s"%(args.type, args.dataname))
        with open(outputfile+".p", "rb") as fin:
            """
            mean_products = pickle.load(fin)
            mean_products_list = []
            for nneg in nnegs:
                cur_products_list = []
                for method in methods:
                    cur_products_list.append(np.float32(mean_products[nneg][method][-1]))
                mean_products_list.append(cur_products_list)
            """
            result = pickle.load(fin)
        mean_products = result['mean_products']
        legendLabels = result['legendLabels']
        x0 = legendLabels[-1].split("-")[0]
        legendLabels[-1] = "above %s" % x0
        for method, vals in mean_products.iteritems():
            if args.combined:
                outputfile = os.path.join(args.opdir, "combined", args.geometry,  "%s.%s.%s.n%d.d%d"%(args.type, args.dataname, method, vals['nneg'], vals['dim']))
            else:
                outputfile = os.path.join(args.opdir, args.geometry, "%s.%s.%s.n%d.d%d"%(args.type, args.dataname, method, vals['nneg'], vals['dim']))
            if args.geometry in ['length']:
                xlabel = 'length'
            else:
                xlabel = 'atm'
            plotDistribution(vals['gp'], xlabel=xlabel, ylabel="Density", legends=legendLabels, modelName=method, outfile=outputfile, show=False, combined=args.combined)
コード例 #6
0
def perfAnalysis(args):
    #self.cfg = ConfigParser()
    #self.cfg.read(args.cfgFile)
    methods = ['transe', 'transr', 'stranse', 'distmult', 'hole', 'complex']
    nnegs = [1, 50, 100]
    dims = [50, 100]
    mean_products = {}
    name_conicity = {}
    useEnt = True
    if not args.result:
        for dim in dims:
            for nneg in nnegs:
                for method in methods:
                    modelfile = "%s.%s.n%d.d%d.p" % (args.dataname, method,
                                                     nneg, dim)
                    modelfile = os.path.join(args.mdir, modelfile)
                    datafile = "%s.%s.bin" % (args.dataname, method)
                    datafile = os.path.join(args.mdir, datafile)
                    analyser = Analyser(datafile, modelfile, usePR=False)
                    #nSamples = 100
                    #eRanges = [((0,100), nSamples), ((100,500), nSamples), ((500,5000), nSamples), ((5000, analyser.t.ne), nSamples)]
                    #entIndices = analyser.getEntIdxs(eRanges)
                    if args.type in ['ent']:
                        nSamples = 100
                        ranges = [((0, 100), nSamples), ((100, 500), nSamples),
                                  ((500, 5000), nSamples),
                                  ((5000, analyser.t.ne), nSamples)]
                        indices = analyser.getEntIdxs(ranges)
                        useEnt = True
                    else:
                        nSamples = 100
                        if args.dataname in ['wn18']:
                            ranges = [((0, 3), 3), ((3, 10), 7),
                                      ((10, analyser.t.nr), analyser.t.nr - 10)
                                      ]
                        else:
                            ranges = [((0, 100), nSamples),
                                      ((100, 500), nSamples),
                                      ((500, analyser.t.nr), nSamples)]
                        indices = analyser.getRelIdxs(ranges)
                        useEnt = False
                    legendLabels = []
                    for a, b in ranges:
                        curLabel = "%d-%d" % (a[0], a[1])
                        legendLabels.append(curLabel)
                    if args.geometry in ['length']:
                        gp, mgp = analyser.getLengths(indices, ent=useEnt)
                    else:
                        gp, mgp = analyser.getInnerProducts(indices,
                                                            sampleMean=True,
                                                            ent=useEnt,
                                                            normalized=True)
                    print "%s\tneg %d" % (method, nneg)
                    print mgp
                    mean_products.setdefault(dim, {}).setdefault(
                        nneg, {})[method] = np.array(mgp, dtype=np.float32)
                    mname = "%s.%s.n%d.d%d" % (args.dataname, method, nneg,
                                               dim)
                    name_conicity[mname] = mgp[-1]
        outputfile = os.path.join(args.opdir, args.geometry,
                                  "%s.%s" % (args.type, args.dataname))
        with open(outputfile + ".p", "wb") as fout:
            pickle.dump(
                {
                    "mean_products": mean_products,
                    "methods": methods,
                    "nnegs": nnegs,
                    "dims": dims,
                    "name_conicity": name_conicity
                }, fout)
            #pickle.dump({"mean_products":mean_products, "mean_products_list":mean_products_list, "methods":methods, "nnegs":nnegs, "dim":dim}, fout)
    else:
        outputfile = os.path.join(args.opdir, args.geometry,
                                  "%s.%s" % (args.type, args.dataname))
        with open(outputfile + ".p", "rb") as fin:
            result = pickle.load(fin)
        if "perfs" not in result:
            with open(args.perffile, "rb") as fin:
                """
                mean_products = pickle.load(fin)
                mean_products_list = []
                for nneg in nnegs:
                    cur_products_list = []
                    for method in methods:
                        cur_products_list.append(np.float32(mean_products[nneg][method][-1]))
                    mean_products_list.append(cur_products_list)
                """
                result['perfs'] = pickle.load(fin)
        #perfs = readPerfs(args.perffile)
        whitelist = []
        for method, nneg, dim in product(result['methods'], result['nnegs'],
                                         result['dims']):
            if dim == 100:
                if method in ['hole', 'complex', 'distmult']:
                    whitelist.append("%s.n%d.d%d" % (method, nneg, dim))
                elif method in ['transe', 'stranse'] and nneg in [1]:
                    whitelist.append("%s.n%d.d%d" % (method, nneg, dim))
                elif method in ['transr']:
                    if nneg == 1:
                        whitelist.append("%s.n%d.d%d" % (method, nneg, dim))
                    elif dim == 100:
                        whitelist.append("%s.n%d.d%d" % (method, nneg, dim))
        if args.geometry in ['length']:
            plotConePerf(methods,
                         nnegs,
                         dims,
                         result,
                         outputfile,
                         xlabel="length",
                         whitelist=whitelist,
                         show=True)
        else:
            plotConePerf(methods,
                         nnegs,
                         dims,
                         result,
                         outputfile,
                         xlabel="conicity",
                         whitelist=whitelist,
                         show=True)