def evalCorrAMI(root): # Axx.json # Bxx.json # AX_BX_clip # AX_BX_rec_a # AX_BX_rec_b namelist = utils.getNames(root) a_array = utils.json2numpy(root + "/" + namelist[0]) a_rec = utils.json2numpy(root + "/" + namelist[3]) b_array = utils.json2numpy(root + "/" + namelist[1]) b_rec = utils.json2numpy(root + "/" + namelist[4]) Corr_a_ap, Corr_b_ap, Corr_a_bp, Corr_b_bp = \ Corr(a_array, a_rec), Corr(b_array, a_rec), Corr(a_array, b_rec), Corr(b_array, b_rec) AMI = Corr_a_ap - Corr_b_ap - Corr_a_bp + Corr_b_bp return Corr_a_ap, Corr_b_ap, Corr_a_bp, Corr_b_bp, AMI, Corr_a_ap - Corr_b_ap, Corr_b_bp - Corr_a_bp
#Open a memory mapped distance matrix. #We do this because the pairwise distance matrix for 100 targets does not fit in memory. #It is nearly 100% dense and has 117747*117747 = 13864356009 elements. This is also #Why it uses float16 (reducing the required storage space to ~26GB, c.f. 52GB for float32). distance_matrix = np.memmap('./processed_data/graph_fp_comparison/distMat.dat', dtype=np.float16, shape=(x_.shape[0], x_.shape[0])) #build the hierarchical clustering tree: clusterer = ParisClusterer(x_.toarray()) clusterer.buildAdjacency() clusterer.fit() #Fingerprints to be compared: fp_names = utils.getNames() fp_dict = {} #this stores the actual fingerprint feature matrices fp_ap_before_trim = {} #this stores the average precisions fp_ap_after_trim = {} #this stores the average precisions #Load up the dictionaries with the relevant feature matrices for each fingerprint: for fp in fp_names: print(fp) featureMatrix, labels = utils.load_feature_and_label_matrices(type=fp) featureMatrix_, labels__ = utils.get_subset(featureMatrix, y, indices=col_indices) fp_dict[fp]=sparse.csr_matrix(featureMatrix_) fp_ap_before_trim[fp] = [] fp_ap_after_trim[fp] = [] #Store all the results in these:
import utils from sklearn.metrics import precision_score, recall_score, roc_auc_score, label_ranking_average_precision_score from sklearn.metrics import label_ranking_loss, confusion_matrix, average_precision_score, auc, precision_recall_curve import statsmodels.api as sm from statsmodels.distributions.empirical_distribution import ECDF from tqdm import tqdm from seaborn import kdeplot import pymc3 as pm ##Set plotting parameters: utils.set_mpl_params() fp_names = utils.getNames(short=False) nicenames = dict() nicenames['morgan'] = 'Morgan' nicenames['2dpharm'] = '2D\nPharm.' nicenames['atom_pair'] = 'Atom Pair' nicenames['erg'] = 'ErG' nicenames['cats'] = 'CATS' nicenames['layered'] = 'Layered' nicenames['maccs'] = 'MACCS' nicenames['morgan_feat'] = 'Morgan\nFeat.' nicenames['pattern'] = 'Pattern' nicenames['rdk'] = 'RDK' nicenames['topo_torsion'] = 'Topol.\nTorsion' fp_aps_before = dict()
import utils import random NAMES = utils.getNames() EMAILS = utils.getEmails() CITY = utils.getCity() STATE = utils.getState() PHONE = utils.getPhone() DATAS_USERS = [] def shuffleDatas(): random.shuffle(NAMES) random.shuffle(EMAILS) random.shuffle(CITY) random.shuffle(STATE) random.shuffle(PHONE) def controller(): utils.showProgramName() utils.line() # --------------- stop = False while stop == False: utils.menuOptions() shuffleDatas() optionUser = str(input('Selecione uma ou mais opções:')) optionUser = optionUser.split(',') for option in optionUser: