Esempio n. 1
0
def evalCorrAMI(root):
    # Axx.json
    # Bxx.json
    # AX_BX_clip
    # AX_BX_rec_a
    # AX_BX_rec_b

    namelist = utils.getNames(root)
    a_array = utils.json2numpy(root + "/" + namelist[0])
    a_rec = utils.json2numpy(root + "/" + namelist[3])
    b_array = utils.json2numpy(root + "/" + namelist[1])
    b_rec = utils.json2numpy(root + "/" + namelist[4])

    Corr_a_ap, Corr_b_ap, Corr_a_bp, Corr_b_bp = \
    Corr(a_array, a_rec), Corr(b_array, a_rec), Corr(a_array, b_rec), Corr(b_array, b_rec)

    AMI = Corr_a_ap - Corr_b_ap - Corr_a_bp + Corr_b_bp

    return Corr_a_ap, Corr_b_ap, Corr_a_bp, Corr_b_bp, AMI, Corr_a_ap - Corr_b_ap, Corr_b_bp - Corr_a_bp
#Open a memory mapped distance matrix.
#We do this because the pairwise distance matrix for 100 targets does not fit in memory.
#It is nearly 100% dense and has 117747*117747 = 13864356009 elements. This is also
#Why it uses float16 (reducing the required storage space to ~26GB, c.f. 52GB for float32).
distance_matrix = np.memmap('./processed_data/graph_fp_comparison/distMat.dat', dtype=np.float16,
              shape=(x_.shape[0], x_.shape[0]))



#build the hierarchical clustering tree:
clusterer = ParisClusterer(x_.toarray())
clusterer.buildAdjacency()
clusterer.fit()

#Fingerprints to be compared:
fp_names = utils.getNames()
fp_dict = {} #this stores the actual fingerprint feature matrices
fp_ap_before_trim = {} #this stores the average precisions
fp_ap_after_trim = {} #this stores the average precisions

#Load up the dictionaries with the relevant feature matrices for each fingerprint:
for fp in fp_names:
    print(fp)
    featureMatrix, labels = utils.load_feature_and_label_matrices(type=fp)
    featureMatrix_, labels__ = utils.get_subset(featureMatrix, y, indices=col_indices)
    fp_dict[fp]=sparse.csr_matrix(featureMatrix_)
    fp_ap_before_trim[fp] = []
    fp_ap_after_trim[fp] = []


#Store all the results in these:
import utils

from sklearn.metrics import precision_score, recall_score, roc_auc_score, label_ranking_average_precision_score
from sklearn.metrics import label_ranking_loss, confusion_matrix, average_precision_score, auc, precision_recall_curve

import statsmodels.api as sm
from statsmodels.distributions.empirical_distribution import ECDF

from tqdm import tqdm
from seaborn import kdeplot
import pymc3 as pm

##Set plotting parameters:
utils.set_mpl_params()
fp_names = utils.getNames(short=False)

nicenames = dict()
nicenames['morgan'] = 'Morgan'
nicenames['2dpharm'] = '2D\nPharm.'
nicenames['atom_pair'] = 'Atom Pair'
nicenames['erg'] = 'ErG'
nicenames['cats'] = 'CATS'
nicenames['layered'] = 'Layered'
nicenames['maccs'] = 'MACCS'
nicenames['morgan_feat'] = 'Morgan\nFeat.'
nicenames['pattern'] = 'Pattern'
nicenames['rdk'] = 'RDK'
nicenames['topo_torsion'] = 'Topol.\nTorsion'

fp_aps_before = dict()
import utils
import random

NAMES = utils.getNames()
EMAILS = utils.getEmails()
CITY = utils.getCity()
STATE = utils.getState()
PHONE = utils.getPhone()

DATAS_USERS = []


def shuffleDatas():
    random.shuffle(NAMES)
    random.shuffle(EMAILS)
    random.shuffle(CITY)
    random.shuffle(STATE)
    random.shuffle(PHONE)


def controller():
    utils.showProgramName()
    utils.line()
    # ---------------
    stop = False
    while stop == False:
        utils.menuOptions()
        shuffleDatas()
        optionUser = str(input('Selecione uma ou mais opções:'))
        optionUser = optionUser.split(',')
        for option in optionUser: