コード例 #1
0
def two_axis_plot_compute():
    d, w, l, cats = mongo2pandas(dbm, request.vars.study,
	    {'category':\
	    {'$in': [ObjectId(el) for el in request.args]}})

    if request.vars.pca:
	# compute principal component analysis
	pca = PCA(n_components=2)
	pca.fit((d*w).values)
	pdata = pandas.DataFrame(index=d.index, columns=['x', 'y', 'c'])
	pdata[['x','y']] = pca.transform((d*w).values)
	pdata['c'] = 0
    else:
	columns = list(cats.keys())
	columns.append('c')
	pdata = pandas.DataFrame(index=d.index, columns=columns)
	# computing rating for categories
	for _cid in cats:
	    mask = cats[_cid]
	    cw = w[mask]
	    cw = cw/cw.sum(axis=1)[0]
	    pdata[_cid] =  (d[mask]*cw).sum(axis=1)
	pdata['c'] = 0

    if request.vars.cluster:
	# K-means clustering
	k_means = KMeans(n_clusters=int(request.vars.n_clusters))
	if request.vars.use_categories and not request.vars.pca:
	    k_means.fit(pdata[[c for c in pdata.columns if c != 'c']].values)
	else:
	    k_means.fit((d*w).values)
	if request.vars.pca:
	    pdata['c'] = k_means.labels_
	else:
	    pdata['c'] = k_means.labels_

    if request.vars.type == 'html':
	request.vars['type'] = 'png'
	img = IMG(_src=URL(r=request, c='analyse', f='two_axis_plot_compute',
		    args=request.args, vars=request.vars))
	if request.vars.cluster:
	    # Create a table with different cluster for visualization
	    bins = np.bincount(k_means.labels_)
	    ii = np.nonzero(bins)[0]
	    ctable = pandas.DataFrame(columns=np.arange(len(np.unique(k_means.labels_))),
		    index=np.arange(bins.max()), dtype=object)
	    ctable.ix[:] = ''
	    for idx, val in enumerate(np.unique(k_means.labels_)):
		companies = d.index[ k_means.labels_ == val]
		ctable[idx][:len(companies)] = sorted(list(companies))

	    return dict(img=img, ctable=ctable)
	else:
	    return dict(img=img)

    elif request.vars.type == 'png':
	if request.vars.pca:
	    return mplt.two_axis_plot(pdata, xlabel=u'Проекция 1', ylabel=u'Проекция 2',
		    title=u'Визуализация методом главных компонентов')
	else:
	    cats_db = list(dbm.categories.find({'_id':\
		    {'$in': [ObjectId(el) for el in request.args]}}))
	    return mplt.two_axis_plot(xval=pdata[request.args[0]],
		    yval=pdata[request.args[1]], c=pdata['c'],
		    xlabel=filter(lambda x: x['_id'] == ObjectId(request.args[0]), cats_db)[0]['title'],
		    ylabel=filter(lambda x: x['_id'] == ObjectId(request.args[1]), cats_db)[0]['title'])
コード例 #2
0
def two_axis_plot_compute():
    d, w, l, cats = mongo2pandas(dbm, request.vars.study,
     {'category':\
     {'$in': [ObjectId(el) for el in request.args]}})

    if request.vars.pca:
        # compute principal component analysis
        pca = PCA(n_components=2)
        pca.fit((d * w).values)
        pdata = pandas.DataFrame(index=d.index, columns=['x', 'y', 'c'])
        pdata[['x', 'y']] = pca.transform((d * w).values)
        pdata['c'] = 0
    else:
        columns = list(cats.keys())
        columns.append('c')
        pdata = pandas.DataFrame(index=d.index, columns=columns)
        # computing rating for categories
        for _cid in cats:
            mask = cats[_cid]
            cw = w[mask]
            cw = cw / cw.sum(axis=1)[0]
            pdata[_cid] = (d[mask] * cw).sum(axis=1)
        pdata['c'] = 0

    if request.vars.cluster:
        # K-means clustering
        k_means = KMeans(n_clusters=int(request.vars.n_clusters))
        if request.vars.use_categories and not request.vars.pca:
            k_means.fit(pdata[[c for c in pdata.columns if c != 'c']].values)
        else:
            k_means.fit((d * w).values)
        if request.vars.pca:
            pdata['c'] = k_means.labels_
        else:
            pdata['c'] = k_means.labels_

    if request.vars.type == 'html':
        request.vars['type'] = 'png'
        img = IMG(_src=URL(r=request,
                           c='analyse',
                           f='two_axis_plot_compute',
                           args=request.args,
                           vars=request.vars))
        if request.vars.cluster:
            # Create a table with different cluster for visualization
            bins = np.bincount(k_means.labels_)
            ii = np.nonzero(bins)[0]
            ctable = pandas.DataFrame(columns=np.arange(
                len(np.unique(k_means.labels_))),
                                      index=np.arange(bins.max()),
                                      dtype=object)
            ctable.ix[:] = ''
            for idx, val in enumerate(np.unique(k_means.labels_)):
                companies = d.index[k_means.labels_ == val]
                ctable[idx][:len(companies)] = sorted(list(companies))

            return dict(img=img, ctable=ctable)
        else:
            return dict(img=img)

    elif request.vars.type == 'png':
        if request.vars.pca:
            return mplt.two_axis_plot(
                pdata,
                xlabel=u'Проекция 1',
                ylabel=u'Проекция 2',
                title=u'Визуализация методом главных компонентов')
        else:
            cats_db = list(dbm.categories.find({'_id':\
             {'$in': [ObjectId(el) for el in request.args]}}))
            return mplt.two_axis_plot(
                xval=pdata[request.args[0]],
                yval=pdata[request.args[1]],
                c=pdata['c'],
                xlabel=filter(lambda x: x['_id'] == ObjectId(request.args[0]),
                              cats_db)[0]['title'],
                ylabel=filter(lambda x: x['_id'] == ObjectId(request.args[1]),
                              cats_db)[0]['title'])
コード例 #3
0
def bar_plot_compute():
    d, w, labels, cats = mongo2pandas(dbm, request.vars.study)
    return mplt.bar_plot(d[list(request.args)], labels)
コード例 #4
0
def bar_plot_compute():
    d, w, labels, cats = mongo2pandas(dbm, request.vars.study)
    return mplt.bar_plot(d[list(request.args)], labels)