Python doStats Examples

Programming Language: Python

Namespace/Package Name: stats

Method/Function: doStats

Examples at hotexamples.com: 5

Python doStats - 5 examples found. These are the top rated real world Python examples of stats.doStats extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: Gibbs_V3.py Project: nvthaomy/PolyFTSGibbsWrapper

    def GetOperatorStats(self, RunPath_):
        ''' Get operator statistics. TODO: Generalize to MD. '''

        operator_statistics = []

        if self.JobType == 'CL' and self.Program == 'polyFTS':
            number_columns = self.Nspecies * 2 + 4
            datafile = open(os.path.join(RunPath_, 'operators.dat'), 'r')
            for c in range(number_columns
                           )[::2]:  #Skip step column and imaginary columns
                try:
                    warmup, Data, nwarmup = stats.autoWarmupMSER(
                        datafile, c + 1)
                except:
                    break
                (nsamples, (min, max), Val, Valerr, kappa, unbiasedvar,
                 autocor) = stats.doStats(warmup, Data, False)
                operator_statistics.append([Val, Valerr, nsamples])

        elif self.JobType == 'SCFT' and self.Program == 'polyFTS':
            number_columns = self.Nspecies + 2
            data = np.loadtxt(os.path.join(RunPath_, 'operators.dat'))[-1]
            for c in range(number_columns):
                operator_statistics.append([data[c + 1], 0., 1])

        return operator_statistics

Example #2

Show file

        runfile.close()
    #call(["PolyFTSGPU.x","run.in"])
    call('{} run.in > run.out'.format(FTS), shell=True)

    # Data analysis:

    # Partition 1
    #datafile = open('model1_operators.dat','r')
    datafile = open('operators.dat','r')
    # ReP1
    try:
        warmup, Data, nwarmup = stats.autoWarmupMSER(datafile,3)
    except:
        print("Failed on ReP1")
        break
    (nsamples,(min,max),P1,P1err,kappa,unbiasedvar,autocor)=stats.doStats(warmup,Data,False)
    datafile.seek(0)
    # ImP1
    try:
        warmup, Data, nwarmup = stats.autoWarmupMSER(datafile,4)
    except:
        print("Failed on ImP1")
        break
    (nsamples,(min,max),imP1,imP1err,kappa,unbiasedvar,autocor)=stats.doStats(warmup,Data,False)
    datafile.seek(0)
    # mu1
    try:
        warmup, Data, nwarmup = stats.autoWarmupMSER(datafile,5)
    except:
        print("Failed on Re mu1")
        break

Example #3

Show file

File: analysis.py Project: EqualAPriori/SCOUTff

def GetRgRee(traj,
             DOP,
             NP,
             NAtomsPerChain=None,
             plotDir='RgRee_plots',
             RgDatName='RgTimeSeries',
             ReeDatName='ReeTimeSeries',
             RgStatOutName='RgReeStats',
             Ext='.dat',
             res0Id=0,
             autowarmup=True,
             nwarmup=100,
             plot=False):
    """NAtomsPerChain: used if running CG system, if provided will assume there is one residue per chain
       multiply coordinates by 10 if input traj was generated by lammps and unit is nonDim"""
    ElementDictionary = {
        "carbon": 12.01,
        "hydrogen": 1.008,
        "oxygen": 16.00,
        "nitrogen": 14.001,
        "virtual site": 1.0,
        "sodium": 23.0,
        "chloride": 35.5
    }
    if plot:
        try:
            os.mkdir(plotDir)
        except:
            pass
        print('...Rg and Ree plots will be saved in {}...\n'.format(plotDir))
    RgTimeseries = [range(traj.n_frames)]
    Rgheader = "Frame   "

    RgSqStats = []
    RgSqTimeseries = [range(traj.n_frames)]
    RgSqheader = "Frame   "
    RgSqList = []

    txtRg = ""

    ReeTimeseries = [range(traj.n_frames)]
    Reeheader = "Frame   "

    ReeSqStats = []
    ReeSqTimeseries = [range(traj.n_frames)]
    ReeSqheader = "Frame   "
    ReeSqList = []

    #get indices of residues in all chains
    MoleculeResidueList = []
    BlockResName = []
    if not NAtomsPerChain:
        #number residues per chain = DOP (for AA systems)
        for j in range(NP):
            resId = range(res0Id + j * DOP, res0Id + (j + 1) * DOP)
            MoleculeResidueList.append(resId)
            resname = []
            for res in traj.topology.residues:
                if res.index in resId:
                    resname.append(res.name)
            #check if diblock
            if j == 0:
                resname1 = resname[0]
                resname2 = resname[-1]
                i1 = np.where(np.array(resname) == resname1)[0]
                i2 = np.where(np.array(resname) == resname2)[0]
                if np.min(i1) == np.min(resId) and int(
                        np.min(i2) -
                        np.max(i1)) == 1 and np.max(i2) == np.max(resId):
                    block = True
                    BlockResName = [resname1, resname2]
                    RgSqList_b = [[], []]
                    RgSqStats_b = [[], []]
                    print(
                        'Detect diblock:\n block 1: {} {}-mer, block 2: {} {}-mer'
                        .format(resname1, len(i1), resname2, len(i2)))
                else:
                    block = False
    else:
        #1 residue per chain (for CG system)
        a0Id = [atom.index for atom in traj.topology.residue(res0Id).atoms]
        a0Id = np.min(a0Id)
        MoleculeResidueList
        for i in range(NP):
            atomId_per_chain = range(
                a0Id + i * NAtomsPerChain,
                a0Id + i * NAtomsPerChain + NAtomsPerChain)
            resId_tmp = [
                traj.topology.atom(aId).residue.index
                for aId in atomId_per_chain
            ]
            MoleculeResidueList.append(np.unique(resId_tmp))
        block = False

    for j, resId in enumerate(MoleculeResidueList):
        resIdLow = np.min(resId)
        resIdUp = np.max(resId)
        atom_indices = traj.topology.select('resid {} to {}'.format(
            resIdLow, resIdUp))
        if block:
            atom_indices_b = []
            mass_list_b = []
            for resname in BlockResName:
                ii = traj.topology.select(
                    "resid {} to {} and resname '{}'".format(
                        resIdLow, resIdUp, resname))
                atom_indices_b.append(ii)
                tmp = []
                for index in ii:
                    element = str(traj.topology.atom(index).element)
                    try:
                        mass = ElementDictionary[element]
                    except:
                        mass = 1.
                    tmp.append(mass)
                tmp = np.array(tmp)
                mass_list_b.append(tmp)
        mass_list = []
        for index in atom_indices:
            element = str(traj.topology.atom(index).element)
            try:
                mass = ElementDictionary[element]
            except:
                mass = 1.
            mass_list.append(mass)
        mass_list = np.array(mass_list)
        if j == 0:
            print('Indices of atoms in chain {} \n{}'.format(
                j + 1, atom_indices))
            print('Mass of atoms in a chain {}'.format(mass_list))
        print('Evaluate Rg and Ree of chain {}/{}'.format(
            j + 1, len(MoleculeResidueList)))
        '''=== Compute Rg ==='''
        Rg = md.compute_rg(traj.atom_slice(atom_indices), masses=mass_list)
        RgTimeseries.append(Rg.tolist())
        Rgheader += 'Rg{}   '.format(j + 1)
        np.savetxt(RgDatName + Ext,
                   np.transpose(RgTimeseries),
                   fmt='%5.5f',
                   header=Rgheader)

        RgSq = Rg**2.
        RgSqTimeseries.append(RgSq.tolist())
        Rgheader += 'Rg{}^2   '.format(j + 1)
        np.savetxt('RgSqTimeSeries' + Ext,
                   np.transpose(RgSqTimeseries),
                   fmt='%5.5f',
                   header=RgSqheader)

        #do stats on Rg^2
        file = open('RgSqTimeSeries' + Ext, 'r')
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1)
            #print ("Auto warmup detection with MSER-5 => ",nwarmup)
        else:
            warmup, Data = stats.extractData(file, j + 1, nwarmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, j + 1))
        Data = Data[::int(np.max([1., kappa]))]  # get decorrelated samples
        RgSqList.extend(Data)

        lines = ""
        lines += '\n==== Rg^2 for molecule {} ===='.format(j + 1)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        txtRg += lines

        Avg = mean
        Std = np.sqrt(unbiasedvar)
        Err = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        RgSqStats.append([Avg, Std, CorrTime, Err, NUncorrSamples])
        ''' Plot Rg '''
        if plot:
            plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED')
            plt.plot(Rg, "k-")
            plt.xlim(0)
            plt.xlabel('timestep')
            plt.ylabel('Radius-of-gryation')
            plt.savefig("{}/Rg{}.png".format(plotDir, j + 1),
                        bbox_inches='tight')
            plt.close()
        ''' Rg of blocks '''
        if block:
            Rg_b = []
            for i, ai in enumerate(atom_indices_b):
                Rg_tmp = md.compute_rg(traj.atom_slice(ai),
                                       masses=mass_list_b[i])
                Rg_b.append(Rg_tmp)
            Rg_b = np.array(Rg_b)
            RgSq_b = Rg_b**2.
            for i, RgSq in enumerate(RgSq_b):
                data = [range(0, len(RgSq))]
                data.append(RgSq.tolist())
                np.savetxt('tmp.dat', np.transpose(data), fmt='%5.5f')
                #do stats on Rg^2
                file = open('tmp.dat', 'r')
                if autowarmup:
                    warmup, Data, nwarmup = stats.autoWarmupMSER(file, 1)

                else:
                    warmup, Data = stats.extractData(file, 1, nwarmup)
                (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
                 autocor) = stats.doStats(warmup, Data, False, False,
                                          '_{0}_mol{1}'.format(file.name, 1))
                Data = Data[::int(np.max([1., kappa
                                          ]))]  # get decorrelated samples
                RgSqList_b[i].extend(Data)

                Avg = mean
                Std = np.sqrt(unbiasedvar)
                Err = semcc
                CorrTime = kappa
                NUncorrSamples = nsamples / kappa
                RgSqStats_b[i].append(
                    [Avg, Std, CorrTime, Err, NUncorrSamples])
            os.remove("tmp.dat")
        '''=== Compute Ree ==='''
        atom_pairs = [np.min(atom_indices), np.max(atom_indices)]
        Ree = md.compute_distances(traj,
                                   atom_pairs=[atom_pairs],
                                   periodic=False,
                                   opt=True)
        Ree = Ree.tolist()
        Ree = [a[0] for a in Ree]
        ReeTimeseries.append(Ree)
        Reeheader += 'Ree{}   '.format(j + 1)
        np.savetxt(ReeDatName + Ext,
                   np.transpose(ReeTimeseries),
                   fmt='%5.5f',
                   header=Reeheader)

        ReeSq = np.array(Ree)**2.
        ReeSqTimeseries.append(ReeSq.tolist())
        Reeheader += 'Ree{}^2   '.format(j + 1)
        np.savetxt('ReeSqTimeSeries' + Ext,
                   np.transpose(ReeSqTimeseries),
                   fmt='%5.5f',
                   header=ReeSqheader)

        #do stats on Ree^2
        file = open('ReeSqTimeSeries' + Ext, 'r')
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1)
            #print ("Auto warmup detection with MSER-5 => ",nwarmup)
        else:
            warmup, Data = stats.extractData(file, j + 1, nwarmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, j + 1))
        Data = Data[::int(np.max([1., kappa]))]
        ReeSqList.extend(Data)

        lines = ""
        lines += '\n==== Ree^2 for molecule {} ===='.format(j + 1)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        txtRg += lines

        Avg = mean
        Std = np.sqrt(unbiasedvar)
        Err = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        ReeSqStats.append([Avg, Std, CorrTime, Err, NUncorrSamples])
        ''' Plot Ree '''
        if plot:
            plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED')
            plt.plot(Ree, "k-")
            plt.xlim(0)
            plt.xlabel('timestep')
            plt.ylabel('End-to-end distance')
            plt.savefig("{}/Ree{}.png".format(plotDir, j + 1),
                        bbox_inches='tight')
            plt.close()

    # get RMS Rg and Ree
    RgSqList = np.array(RgSqList)
    RgRMS = np.sqrt(np.mean(RgSqList))
    RgSqErr = scipy.stats.sem(RgSqList)
    RgRMSErr = 1. / 2. * RgSqErr / RgRMS  # propagate SEM of Rg^2 to Rg
    RgSqStd = np.std(RgSqList, ddof=1)
    RgRMSStd = 1. / 2. * RgSqStd / RgRMS  # propagate Std of Rg^2 to Rg
    RgSqStats = np.array(RgSqStats)
    RgRMSCorrTime = np.mean(RgSqStats[:, 2])
    RgRMSCorrTimeErr = np.sqrt(np.var(RgSqStats[:, 2]) / len(RgSqStats[:, 2]))
    RgRMSNUncorrSamples = np.mean(RgSqStats[:, 4])

    #Rg of blocks
    RgRMS_b = []
    RgRMSErr_b = []
    RgRMSStd_b = []
    RgRMSCorrTime_b = []
    RgRMSCorrTimeErr_b = []
    RgRMSNUncorrSamples_b = []
    if block:
        for i, resname in enumerate(BlockResName):
            RgSqList = np.array(RgSqList_b[i])
            RgRMS_b.append(np.sqrt(np.mean(RgSqList)))
            Err = scipy.stats.sem(RgSqList)
            RgRMSErr_b.append(1. / 2. * Err / RgRMS_b[i])
            Std = np.std(RgSqList, ddof=1)
            RgRMSStd_b.append(1. / 2. * Std / RgRMS_b[i])
            RgSqStats = np.array(RgSqStats_b[i])
            RgRMSCorrTime_b.append(np.mean(RgSqStats[:, 2]))
            RgRMSCorrTimeErr_b.append(
                np.sqrt(np.var(RgSqStats[:, 2]) / len(RgSqStats[:, 2])))
            RgRMSNUncorrSamples_b.append(np.mean(RgSqStats[:, 4]))

    #Ree
    ReeSqList = np.array(ReeSqList)
    ReeRMS = np.sqrt(np.mean(ReeSqList))
    ReeSqErr = scipy.stats.sem(ReeSqList)
    ReeRMSErr = 1. / 2. * ReeSqErr / ReeRMS
    ReeSqStd = np.std(ReeSqList, ddof=1)
    ReeRMSStd = 1. / 2. * ReeSqStd / ReeRMS
    ReeSqStats = np.array(ReeSqStats)
    ReeRMSCorrTime = np.mean(ReeSqStats[:, 2])
    ReeRMSCorrTimeErr = np.sqrt(
        np.var(ReeSqStats[:, 2]) / len(ReeSqStats[:, 2]))
    ReeRMSNUncorrSamples = np.mean(ReeSqStats[:, 4])

    lines = ""
    lines += '\n\n====================='
    lines += '\n\nRMS of Rg is: {0:2.4f} +/- {1:2.5f}'.format(RgRMS, RgRMSErr)
    lines += '\nRMS Rg correlation time: {0:5.4f} +/- {1:5.6f}'.format(
        RgRMSCorrTime, RgRMSCorrTimeErr)
    lines += '\n\nRMS of Ree is: {0:2.4f} +/- {1:2.5f}'.format(
        ReeRMS, ReeRMSErr)
    lines += '\nRMS Ree correlation time: {0:5.4f} +/- {1:5.6f}'.format(
        ReeRMSCorrTime, ReeRMSCorrTimeErr)
    if block:
        for i, resname in enumerate(BlockResName):
            lines += '\n\nRMS of Rg for block %i-%s is: %2.4f +/- %2.5f' % (
                i + 1, resname, RgRMS_b[i], RgRMSErr_b[i])
            lines += '\nRMS Rg correlation time: {0:5.4f} +/- {1:5.6f}'.format(
                RgRMSCorrTime_b[i], RgRMSCorrTimeErr_b[i])
    print(lines + '\n')
    txtRg += lines
    f = open(RgStatOutName + Ext, 'w')
    f.write(txtRg)
    return RgRMS, ReeRMS, RgRMSErr, ReeRMSErr, RgRMSCorrTime, RgRMSCorrTimeErr, RgRMSNUncorrSamples, ReeRMSCorrTime, ReeRMSCorrTimeErr, ReeRMSNUncorrSamples, RgRMSStd, ReeRMSStd, RgRMS_b, RgRMSErr_b, RgRMSStd_b, RgRMSCorrTime_b, RgRMSCorrTimeErr_b, RgRMSNUncorrSamples_b, BlockResName

Example #4

Show file

File: analysis.py Project: EqualAPriori/SCOUTff

def GetThermo(ThermoLog,
              fi='lammps',
              obs=None,
              cols=None,
              autowarmup=True,
              nwarmup=100,
              plot=False,
              plotDir='Thermo_plots'):
    """ fi: log file format, 'lammps' or 'openmm' """
    if not obs == None and not cols == None:
        Exception(
            'Read data either by observable name or column index but not both!'
        )

    if plot:
        try:
            os.mkdir(plotDir)
        except:
            pass
        print('...Thermo plots will be saved in {}...\n'.format(plotDir))

    #conver log file:
    if fi == 'openmm':
        ThermoLog = log2txt.log2txt_openmm([ThermoLog])[0]
    elif fi == 'lammps':
        section = 'PRODUCTION RUNS'
        ThermoLog = log2txt.log2txt_lammps([ThermoLog], section,
                                           'production')[0]

    print('new log file: {}'.format(ThermoLog))
    txt = ""
    obsID = []
    Stats = []
    #do stats
    file = open(ThermoLog, 'r')
    if not obs == None:
        lines = file.readlines()
        while not isinstance(cols, list):
            for line in lines:
                if line.startswith('#'):
                    obsNames = line.split()[1:]
                    print('obsNames {}'.format(obsNames))
                    cols = [
                        obsNames.index(val) for val in obsNames if val in obs
                    ]
    print('cols {}'.format(cols))
    for i, col in enumerate(cols):
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, col)
            print("Auto warmup detection with MSER-5 => ", nwarmup)
        else:
            warmup, Data = stats.extractData(file, col, nwarmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, col))
        try:
            obsName = obsNames[col]
        except:
            obsName = 'col{}'.format(col)
        lines = ""
        lines += '\n==== {} ===='.format(obsName)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        ''' Plot '''
        if plot:
            plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED')
            plt.plot(np.hstack((warmup, Data)))
            plt.xlim(0)
            plt.xlabel('timestep')
            plt.ylabel(obsName)
            plt.savefig("{}/{}.png".format(plotDir, obsName),
                        bbox_inches='tight')
            plt.close()

        print(lines)
        txt += lines

        Avg = mean
        Std = np.sqrt(unbiasedvar)
        Err = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        Stats.append([Avg, Std, CorrTime, Err, NUncorrSamples])
        obsID.append(obsName)

    return obsID, Stats

Example #5

Show file

        runfile.close()
    call('{} run.in > run.out'.format(fts), shell=True)

    # Data analysis:

    # Partition 1
    #datafile = open('model1_operators.dat','r')
    datafile = open('operators.dat', 'r')
    # ReP1
    try:
        warmup, Data, nwarmup = stats.autoWarmupMSER(datafile, 3)
    except:
        print("Failed on ReP1")
        break
    (nsamples, (min, max), P1, P1err, kappa, unbiasedvar,
     autocor) = stats.doStats(warmup, Data, False)
    datafile.seek(0)
    # ImP1
    try:
        warmup, Data, nwarmup = stats.autoWarmupMSER(datafile, 4)
    except:
        print("Failed on ImP1")
        break
    (nsamples, (min, max), imP1, imP1err, kappa, unbiasedvar,
     autocor) = stats.doStats(warmup, Data, False)
    datafile.seek(0)

    #mu and immu
    mus = np.zeros(len(xs))
    muerrs = np.zeros(len(xs))
    immus = np.zeros(len(xs))