def _getmeans(self): # called by fit_to_reference and _do_iteration # calls map_all() # analyses the outputs and returns dG* and dG0 means # raises QMapperError on total failure self.mapall(_supress_info=True) for (qfo, err) in six.iteritems(self.failed): logger.info("Failed to map '{}': {}".format(qfo, err)) if not self.mapped: raise QMapperError("All directories failed to map! Try changing " "the initial-guess values (Hij and alpha) " "or step_size... Also, check the mapping " "parameters (skip, bins, ...).") dga, dg0 = [], [] for mapdir, (_, qfo_str) in six.iteritems(self.mapped): try: qfo = QFepOutput(qfo_str) dga.append(qfo.part3.dga) dg0.append(qfo.part3.dg0) except QFepOutputError as error_msg: logger.info("Failed to analyse '{}': {}" "".format(mapdir, error_msg)) except Exception as error_msg: logger.warning("Uncaught exception when analysing '{}': {}" "".format(mapdir, error_msg)) if not dga or not dg0: raise QMapperError("All directories failed to analyse! Try " "changing the initial-guess values (Hij and " "alpha) or step_size...") return stats.mean(dga), stats.mean(dg0)
def get_temp_stats(self, stride=1): """Returns temperature stats in string format (used for cmdline printout) for all logfiles combined """ temps = self.get_temps(stride=stride) tt, tf, tf_solu, tf_solv = temps.get_columns(("T_tot", "T_free", "T_free_solute", "T_free_solvent")) tt_mean, tt_std = stats.mean(tt), stats.stdev(tt) tf_mean, tf_std = stats.mean(tf), stats.stdev(tf) tf_solu_mean, tf_solu_std = stats.mean(tf_solu), stats.stdev(tf_solu) tf_solv_mean, tf_solv_std = stats.mean(tf_solv), stats.stdev(tf_solv) tt_max_dev = max([abs(x - tt_mean) for x in tt]) tf_max_dev = max([abs(x - tf_mean) for x in tf]) tf_solu_max_dev = max([abs(x - tf_solu_mean) for x in tf_solu]) tf_solv_max_dev = max([abs(x - tf_solv_mean) for x in tf_solv]) outstr = """\ Temperature stats: {0:20s}{1:>15s}{2:>15s}{3:>15s} {4:20s}{5:>15.2f}{6:>15.2f}{7:>15.2f} {8:20s}{9:>15.2f}{10:>15.2f}{11:>15.2f} {12:20s}{13:>15.2f}{14:>15.2f}{15:>15.2f} {16:20s}{17:>15.2f}{18:>15.2f}{19:>15.2f} """.format("", "Mean", "Stdev", "Max.Abs.Dev.", "T_total", tt_mean, tt_std, tt_max_dev, "T_free", tf_mean, tf_std, tf_max_dev, "T_free_solute", tf_solu_mean, tf_solu_std, tf_solu_max_dev, "T_free_solvent", tf_solv_mean, tf_solv_std, tf_solv_max_dev) return outstr
def stats_str(self): """Free energy stats in string format.""" dgas = self.dgas.values() dg0s = self.dg0s.values() dgs_fep = self.dgs_fep.values() allres = {} allres["calc_type"] = self._subcalc_key or "" allres["dg_n"] = len(dgas) allres["dga"] = (stats.mean(dgas), stats.stdev(dgas), stats.median(dgas), stats.sem(dgas)) allres["dg0"] = (stats.mean(dg0s), stats.stdev(dg0s), stats.median(dg0s), stats.sem(dg0s)) allres["dg_fep_n"] = len(dgs_fep) allres["dg_fep"] = (stats.mean(dgs_fep), stats.stdev(dgs_fep), stats.median(dgs_fep), stats.sem(dgs_fep)) return """\ # {calc_type:<15} Mean Std.dev Median Std.error N dG* {dga[0]:10.2f} {dga[1]:10.2f} {dga[2]:10.2f} {dga[3]:10.2f} {dg_n:10} dG0 {dg0[0]:10.2f} {dg0[1]:10.2f} {dg0[2]:10.2f} {dg0[3]:10.2f} {dg_n:10} dG_lambda {dg_fep[0]:10.2f} {dg_fep[1]:10.2f} {dg_fep[2]:10.2f} \ {dg_fep[3]:10.2f} {dg_fep_n:10} """.format(**allres)
def lra_stats(self): """Calculate average and st.dev of LRA and reorg energies.""" average_lras = DataContainer(["E_type", "(E2-E1)_10_mean", "(E2-E1)_10_std", "(E2-E1)_01_mean", "(E2-E1)_01_std", "LRA_mean", "LRA_std", "REORG_mean", "REORG_std"]) allvals = [] for lra in self.lras.values(): rows = lra.get_rows() for irow, row in enumerate(rows): try: allvals[irow].append(row) except IndexError: allvals.append([row,]) # allvals now looks like this: # [ # [ # ["EQtot", EQtot_de_st1_1, EQtot_de_st2_1, EQtot_lra_1, EQtot_reorg_1], # ["EQtot", EQtot_de_st1_2, EQtot_de_st2_2, ...], ... # ], # [ # ["EQbond", EQbond_de_st1_1, EQbond_de_st2_1, EQbond_lra_1, EQbond_reorg_1], # ["EQbond", EQbond_de_st1_2, EQbond_de_st2_2, ...], ... # ] # ] # for values in allvals: # transpose to get [ ["EQtot","EQtot"...], # [ EQtot_de_st1_1, EQtot_de_st1_2,...], # [ EQtot_de_st2_1, EQtot_de_st2_2,...], ...] values = list(zip(*values)) # now they can be easily averaged and std-ed e_type = values[0][0] de_st1_mean = stats.mean(values[1]) de_st2_mean = stats.mean(values[2]) lra_mean = stats.mean(values[3]) reo_mean = stats.mean(values[4]) de_st1_std = stats.stdev(values[1]) de_st2_std = stats.stdev(values[2]) lra_std = stats.stdev(values[3]) reo_std = stats.stdev(values[4]) average_lras.add_row([e_type, de_st1_mean, de_st1_std, de_st2_mean, de_st2_std, lra_mean, lra_std, reo_mean, reo_std]) return average_lras
def calcall(self): """Run the GC calcs, update .gcs, .failed and .gcs_stats. """ semaphore = threading.BoundedSemaphore(self._nthreads) self._qcalc_io.clear() self.gcs.clear() self.gcs_stats.delete_rows() self.failed.clear() threads = [] for calcdir in self._calcdirs: threads.append(_QGroupContribThread(self, semaphore, calcdir)) threads[-1].start() for t in threads: while t.isAlive(): t.join(1.0) if self.kill_event.is_set(): try: t.qcalc.process.terminate() except Exception as e: pass return if t.error: self.failed[t.calcdir] = t.error else: self._qcalc_io[t.calcdir] = (t.qinps, t.qouts) # parse the output for results and # calculate LRAs for each dir for _dir, (_, qouts) in six.iteritems(self._qcalc_io): gcs = [] failed_flag = False for qout in qouts: try: qco = QCalcOutput(qout) res = qco.results["gc"] if not self.qcalc_version: self.qcalc_version = qco.qcalc_version except (QCalcError, KeyError) as error_msg: self.failed[_dir] = error_msg failed_flag = True break gc = {} for row in res.get_rows(): resid, vdw, el = int(row[0]), float(row[1]), float(row[2]) gc[resid] = {"vdw": vdw, "el": el} gcs.append(gc) if failed_flag: continue resids = sorted(gcs[0].keys()) resnames = [self._pdb_qstruct.residues[ri-1].name for ri in resids] # do the LRA thingy # LRA = 0.5*(<E2-E1>_conf1+<E2-E1>_conf2) # REORG = <E2-E1>_conf1 - LRA e2e1_st1_vdw = [gcs[1][key]["vdw"] - gcs[0][key]["vdw"] for key in resids] e2e1_st1_el = [gcs[1][key]["el"] - gcs[0][key]["el"] for key in resids] e2e1_st2_vdw = [gcs[3][key]["vdw"] - gcs[2][key]["vdw"] for key in resids] e2e1_st2_el = [gcs[3][key]["el"] - gcs[2][key]["el"] for key in resids] # super efficient stuff here vdw_lra = [0.5*(a + b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)] el_lra = [0.5*(a + b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)] vdw_reorg = [0.5*(a - b) for a, b in zip(e2e1_st1_vdw, e2e1_st2_vdw)] el_reorg = [0.5*(a - b) for a, b in zip(e2e1_st1_el, e2e1_st2_el)] # scale the ionized residues if abs(self._scale_ionized - 1.0) > 1e-7: for i, resname in enumerate(resnames): if resname in ("ARG", "LYS", "HIP", "ASP", "GLU"): e2e1_st1_el[i] = e2e1_st1_el[i] / self._scale_ionized e2e1_st2_el[i] = e2e1_st2_el[i] / self._scale_ionized el_lra[i] = el_lra[i] / self._scale_ionized el_reorg[i] = el_reorg[i] / self._scale_ionized # write the DataContainer lambda1_st1 = self._lambdas_A[0] lambda2_st1 = self._lambdas_B[0] gc_lra = DataContainer(["Residue_id", "Residue name", "<E2-E1>1_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>1_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "<E2-E1>2_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "<E2-E1>2_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "LRA_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "LRA_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized), "REORG_VdW(l={:5.4f}->l={:5.4f})" "".format(lambda1_st1, lambda2_st1), "REORG_El(l={:5.4f}->l={:5.4f})_(iscale={})" "".format(lambda1_st1, lambda2_st1, self._scale_ionized)]) for row in zip(resids, resnames, e2e1_st1_vdw, e2e1_st1_el, e2e1_st2_vdw, e2e1_st2_el, vdw_lra, el_lra, vdw_reorg, el_reorg): gc_lra.add_row(row) self.gcs[_dir] = gc_lra # get GC stats over all directories self.gcs_stats.delete_rows() gcs = {} for _, gc in six.iteritems(self.gcs): for row in gc.get_rows(): resid, resname = row[0:2] res_key = "{}.{}".format(resid, resname) values = [[val,] for val in row[2:]] if res_key not in gcs: gcs[res_key] = values else: for i, val in enumerate(gcs[res_key]): val.extend(values[i]) # iterate through each residue and calculate # means and stdevs # (sort by residue index) for res_key in sorted(gcs, key=lambda x: int(x.split(".")[0])): rc = gcs[res_key] resid, resname = res_key.split(".") # get mean and stdev rc_stats = [int(resid), resname, len(rc[0]), stats.mean(rc[0]), stats.stdev(rc[0]), # <E2-E1>1 vdw stats.mean(rc[1]), stats.stdev(rc[1]), # <E2-E1>1 el stats.mean(rc[2]), stats.stdev(rc[2]), # <E2-E1>2 vdw stats.mean(rc[3]), stats.stdev(rc[3]), # <E2-E1>2 el stats.mean(rc[4]), stats.stdev(rc[4]), # LRA vdw stats.mean(rc[5]), stats.stdev(rc[5]), # LRA el stats.mean(rc[6]), stats.stdev(rc[6]), # REORG vdw stats.mean(rc[7]), stats.stdev(rc[7])] # REORG el self.gcs_stats.add_row(rc_stats)
def test_mean(self): vals = [i**2 for i in range(1, 21)] assert is_close(stats.mean(vals), 143.5)