Python textfile Examples, molSimplify.job_manager.classes.textfile Python Examples

Example #1

0

Show file

def list_active_jobs(ids=False, home_directory=False, parse_bundles=False):
    #  @return A list of active jobs for the current user. By job name

    if (ids and parse_bundles) or (parse_bundles and not home_directory):
        raise Exception('Incompatible options passed to list_active_jobs()')
    if home_directory == 'in place':
        home_directory = os.getcwd()

    job_report = textfile()
    try:
        job_report.lines = call_bash("qstat -r")
    except:
        job_report.lines = []

    names = job_report.wordgrab('jobname:', 2)[0]
    names = [i for i in names if i]  # filters out NoneTypes

    if ids:
        job_ids = []
        line_indices_of_jobnames = job_report.wordgrab('jobname:',
                                                       2,
                                                       matching_index=True)[0]
        line_indices_of_jobnames = [i for i in line_indices_of_jobnames
                                    if i]  # filters out NoneTypes
        for line_index in line_indices_of_jobnames:
            job_ids.append(int(job_report.lines[line_index - 1].split()[0]))
        if len(names) != len(job_ids):
            print((len(names)))
            print((len(job_ids)))
            raise Exception('An error has occurred in listing active jobs!')
        return names, job_ids

    if parse_bundles and os.path.isfile(
            os.path.join(home_directory, 'bundle', 'bundle_id')):

        fil = open(os.path.join(home_directory, 'bundle', 'bundle_id'), 'r')
        identifier = fil.readlines()[0]
        fil.close()

        bundles = [i for i in names if i.startswith('bundle_')]
        bundles = [
            i.rsplit('_', 1)[0] for i in names if i.endswith(identifier)
        ]
        names = [i for i in names if i not in bundles]

        for bundle in bundles:
            info_path = glob.glob(
                os.path.join(home_directory, 'bundle', bundle, '*_info'))[0]
            fil = open(info_path, 'r')
            lines = fil.readlines()
            lines = [i[:-1] if i.endswith('\n') else i for i in lines]
            fil.close()
            names.extend(lines)

    return names

Example #2

0

Show file

def read_charges(PATH):
    # Takes the path to either the outfile or the charge_mull.xls and returns the charges
    PATH = convert_to_absolute_path(PATH)
    if len(PATH.rsplit('.', 1)) > 1:
        if PATH.rsplit('.', 1)[1] == 'out':
            PATH = os.path.join(os.path.split(PATH)[0], 'scr', 'charge_mull.xls')
    try:
        charge_mull = textfile(PATH)
        split_lines = [i.split() for i in charge_mull.lines]
        charges = [i[1] + ' ' + i[2] for i in split_lines]
        return charges
    except:
        return []

Example #3

0

Show file

def jobmanager2mAD(job, active_jobs, dbname=False, gene=False):
    this_run = False
    basedir, jobname = job[0], job[1]
    outfile = basedir + '/' + jobname + '.out'
    if not (os.path.split(outfile.rsplit('_', 1)[0])[-1] in active_jobs) or ('nohup' in outfile):
        output = textfile(outfile)
        try:
            spin = int(output.wordgrab(['Spin multiplicity:'], -1)[0][0])
        except:
            print(('Cannot read file: ', outfile))
            return this_run
        this_run = common_processing(jobname, basedir, output, outfile, spin, dbname=dbname, gene=gene)
        issp = isSP(outfile)
        if not issp:
            this_run = process_geometry_optimizations(this_run, basedir, outfile, output)
            for a in list(associated_jobs.keys()):
                associated_jobs[a](this_run, jobname, basedir)
        else:
            this_run = process_single_points(this_run, basedir, output)
    return this_run

Example #4

0

Show file

def read_mullpop(PATH):
    # Takes the path to either the outfile or the mullpop and returns the mullikan populations
    PATH = convert_to_absolute_path(PATH)
    if len(PATH.rsplit('.', 1)) > 1:
        if PATH.rsplit('.', 1)[1] == 'out':
            PATH = os.path.join(os.path.split(PATH)[0], 'scr', 'mullpop')

    mullpop = textfile(PATH)
    ### If multiple frames in mullpop, grab last frame
    total_lines = mullpop.wordgrab(['------------ ---------- ----------'], [1], matching_index=True)[0]
    if len(total_lines) > 1:
        mullpop.lines = mullpop.lines[total_lines[-2] + 2:]

    split_lines = [i.split() for i in mullpop.lines]
    if len(split_lines[2]) == 6:
        pops = [i[1] + ' ' + i[5] for i in split_lines[1:-2]]
    else:
        pops = [i[1] + ' ' + i[5] + ' ' + i[9] for i in split_lines[2:-2]]

    return pops

Example #5

0

Show file

File: manager_io.py Project: edgarin1st/molSimplify

def read_outfile(outfile_path, short_ouput=False, long_output=True):
    ## Reads TeraChem and ORCA outfiles
    #  @param outfile_path complete path to the outfile to be read, as a string
    #  @return A dictionary with keys finalenergy,s_squared,s_squared_ideal,time
    output = textfile(outfile_path)
    output_type = output.wordgrab(['TeraChem', 'ORCA'],
                                  ['whole_line', 'whole_line'])
    # print("output_type: ", output_type)
    for counter, match in enumerate(output_type):
        if match[0]:
            break
        if counter == 1:
            if 'nohup' in outfile_path:
                print('Warning! Nohup file caught in outfile processing')
                print(outfile_path)
                counter = 0
            elif 'smd.out' in outfile_path:
                print('Warning! SMD file caught in outfile processing')
                print(outfile_path)
                counter = 0
            elif ('atom' in outfile_path) and ('ORCA' in output_type):
                print('Density fitting output caught in outfile processing')
                print(outfile_path)
                counter = 0
            else:
                print('.out file type not recognized for file: ' +
                      outfile_path)
                return_dict = {
                    'name': None,
                    'charge': None,
                    'finalenergy': None,
                    'time': None,
                    's_squared': None,
                    's_squared_ideal': None,
                    'finished': False,
                    'min_energy': None,
                    'scf_error': False,
                    'thermo_grad_error': False,
                    'solvation_energy': None,
                    'optimization_cycles': None,
                    'thermo_vib_energy': None,
                    'thermo_vib_free_energy': None,
                    'thermo_suspect': None,
                    'orbital_occupation': None,
                    'oscillating_scf_error': False
                }
                return return_dict

    output_type = ['TeraChem', 'ORCA'][counter]

    name = None
    finished = False
    charge = None
    finalenergy = None
    min_energy = None
    s_squared = None
    s_squared_ideal = None
    scf_error = False
    time = None
    thermo_grad_error = False
    implicit_solvation_energy = None
    geo_opt_cycles = None
    thermo_vib = None
    thermo_vib_f = None
    thermo_suspect = None
    orbital_occupation = None
    oscillating_scf_error = False

    name = os.path.split(outfile_path)[-1]
    name = name.rsplit('.', 1)[0]
    if output_type == 'TeraChem':

        charge = output.wordgrab(['charge:'], [2], first_line=True)[0]
        if charge:
            charge = int(charge)
        if not short_ouput:
            (finalenergy, s_squared, s_squared_ideal, time, thermo_grad_error,
             implicit_solvation_energy, geo_opt_cycles, thermo_vib,
             thermo_vib_f, thermo_suspect) = output.wordgrab([
                 'FINAL', 'S-SQUARED:', 'S-SQUARED:', 'processing',
                 'Maximum component of gradient is too large',
                 'C-PCM contribution to final energy:', 'Optimization Cycle',
                 'Thermal vibrational energy',
                 'Thermal vibrational free energy',
                 'Thermochemical Analysis is Suspect'
             ], [2, 2, 4, 3, 0, 4, 3, 7, 10, 0],
                                                             last_line=True)
        if short_ouput:
            s_squared, s_squared_ideal, thermo_grad_error = output.wordgrab(
                [
                    'S-SQUARED:', 'S-SQUARED:',
                    'Maximum component of gradient is too large'
                ], [2, 4, 0],
                last_line=True)
        oscillating_scf = get_scf_progress(outfile_path)
        if oscillating_scf:
            oscillating_scf_error = True
        else:
            oscillating_scf_error = False
        if thermo_grad_error:
            thermo_grad_error = True
        else:
            thermo_grad_error = False
        if thermo_suspect:
            thermo_suspect = True
        else:
            thermo_suspect = False

        if s_squared_ideal:
            s_squared_ideal = float(s_squared_ideal.strip(')'))
        if implicit_solvation_energy:
            implicit_solvation_energy = try_float(
                implicit_solvation_energy.split(':')[-1])

        min_energy = output.wordgrab('FINAL', 2, min_value=True)[0]

        is_finished = output.wordgrab(['finished:'],
                                      'whole_line',
                                      last_line=True)[0]
        if is_finished:
            if is_finished[0] == 'Job' and is_finished[1] == 'finished:':
                finished = True

        is_scf_error = output.wordgrab('DIIS', 5, matching_index=True)[0]
        if is_scf_error[0]:
            is_scf_error = [output.lines[i].split() for i in is_scf_error]
        else:
            is_scf_error = []
        if type(is_scf_error) == list and len(is_scf_error) > 0:
            for scf in is_scf_error:
                if ('failed' in scf) and ('converge' in scf) and (
                        'iterations,' in scf) and ('ADIIS' in scf):
                    scf = scf[5]
                    scf = int(scf.split('+')[0])
                    if scf > 5000:
                        scf_error = [True, scf]
        if long_output:
            nbo_start, nbo_end = output.wordgrab([
                'NATURAL POPULATIONS:  Natural atomic orbital occupancies',
                'Summary of Natural Population Analysis:'
            ],
                                                 'whole_line',
                                                 matching_index=True,
                                                 first_line=True)
            if nbo_start and nbo_end:
                nbo_lines = output.lines[nbo_start:nbo_end]
                nbo_lines = [
                    line for line in nbo_lines if len(line.split()) > 0
                ]  # filter out empty lines
                nbo_lines = [
                    line for line in nbo_lines if line.split()[0].isdigit()
                ]  # filter only results lines
                nbo_lines = [
                    line for line in nbo_lines if line.split()[4] == 'Val('
                ]  # filter only valence orbitals

                if len(nbo_lines) > 0:
                    orbital_occupation = dict()
                    for line in nbo_lines:
                        key = line.split()[1] + '_' + line.split(
                        )[2] + '_' + line.split()[3]
                        if key in orbital_occupation.keys():
                            raise Exception(
                                outfile_path + ' ' + key +
                                ': Same key found twice in nbo parsing!')
                        if len(line.split()) > 8:  # for open shell systems
                            orbital_occupation[key] = [
                                float(line.split()[-3]),
                                float(line.split()[-1])
                            ]
                        else:  # For closed shell systems
                            orbital_occupation[key] = [
                                float(line.split()[-2]),
                                float(0)
                            ]

    if output_type == 'ORCA':
        finished, finalenergy, s_squared, s_squared_ideal, implicit_solvation_energy = output.wordgrab(
            [
                '****ORCA TERMINATED NORMALLY****', 'FINAL', '<S**2>',
                'S*(S+1)', 'CPCM Dielectric    :'
            ], [0, -1, -1, -1, 3],
            last_line=True)
        if finished == '****ORCA':
            finished = True

        timekey = output.wordgrab('TOTAL RUN TIME:',
                                  'whole_line',
                                  last_line=True)[0]
        if type(timekey) == list:
            time = (float(timekey[3]) * 24 * 60 * 60 +
                    float(timekey[5]) * 60 * 60 + float(timekey[7]) * 60 +
                    float(timekey[9]) + float(timekey[11]) * 0.001)

        if finished:
            charge = output.wordgrab(['Total Charge'], [-1], last_line=True)[0]
            charge = int(
                round(charge, 0)
            )  # Round to nearest integer value (it should always be very close)

        opt_energies = output.wordgrab('FINAL SINGLE POINT ENERGY', -1)[0]
        geo_opt_cycles, min_energy = len(opt_energies), min(opt_energies)

    return_dict = {}
    return_dict['name'] = name
    return_dict['charge'] = charge
    return_dict['finalenergy'] = try_float(finalenergy)
    return_dict['time'] = try_float(time)
    return_dict['s_squared'] = try_float(s_squared)
    return_dict['s_squared_ideal'] = try_float(s_squared_ideal)
    return_dict['finished'] = finished
    return_dict['min_energy'] = try_float(min_energy)
    return_dict['scf_error'] = scf_error
    return_dict['thermo_grad_error'] = thermo_grad_error
    return_dict['solvation_energy'] = implicit_solvation_energy
    return_dict['optimization_cycles'] = geo_opt_cycles
    return_dict['thermo_vib_energy'] = try_float(thermo_vib)
    return_dict['thermo_vib_free_energy'] = try_float(thermo_vib_f)
    return_dict['thermo_suspect'] = thermo_suspect
    return_dict['orbital_occupation'] = orbital_occupation
    return_dict['oscillating_scf_error'] = oscillating_scf_error
    return_dict['outfile_path'] = outfile_path
    return return_dict

Example #6

0

Show file

File: manager_io.py Project: edgarin1st/molSimplify

def read_infile(outfile_path):
    # Takes the path to either the outfile or the infile of a job
    # Returns a dictionary of the job settings included in that infile

    root = outfile_path.rsplit('.', 1)[0]
    unique_job_name = os.path.split(root)[-1]
    inp = textfile(root + '.in')
    if '#ORCA' in inp.lines[0]:
        qm_code = 'orca'
    else:
        qm_code = 'terachem'

    if qm_code == 'terachem':
        charge, spinmult, solvent, run_type, levelshifta, levelshiftb, method, hfx, basis, dispersion, coordinates, guess = inp.wordgrab(
            [
                'charge ', 'spinmult ', 'epsilon ', 'run ', 'levelshiftvala ',
                'levelshiftvalb ', 'method ', 'HFX ', 'basis ', 'dispersion ',
                'coordinates ', 'guess '
            ], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            last_line=True)
        charge, spinmult = int(charge), int(spinmult)
        if guess:
            guess = True
        else:
            guess = False
        if method[0] == 'u':
            method = method[1:]

        convergence_thresholds = inp.wordgrab([
            'min_converge_gmax ', 'min_converge_grms ', 'min_converge_dmax ',
            'min_converge_drms ', 'min_converge_e ', 'convthre '
        ], [1] * 6,
                                              last_line=True)
        if not convergence_thresholds[0]:
            convergence_thresholds = None

        multibasis = inp.wordgrab(['$multibasis', '$end'], [0, 0],
                                  last_line=True,
                                  matching_index=True)
        if not multibasis[0]:
            multibasis = False
        else:
            multibasis = inp.lines[multibasis[0] + 1:multibasis[1]]

        constraints = inp.wordgrab(['$constraint_freeze', '$end'], [0, 0],
                                   last_line=True,
                                   matching_index=True)
        if not constraints[0]:
            constraints = False
        else:
            constraints = inp.lines[constraints[0] + 1:constraints[1]]

        if constraints and multibasis:
            raise Exception(
                'The current implementation of tools.read_infile() is known to behave poorly when an infile specifies both a multibasis and constraints'
            )

    elif qm_code == 'orca':
        ligand_basis, run_type, method, parallel_environment, charge, spinmult, coordinates = inp.wordgrab(
            ['! MULLIKEN'] * 3 + [r'%pal'] + [r'xyzfile'] * 3,
            [2, 3, 4, 2, 1, 2, 3],
            last_line=True)

        charge, spinmult = int(charge), int(spinmult)
        if run_type == 'opt':
            run_type = 'minimize'

        levelshift, solvent, metal_basis = inp.wordgrab(
            [r'%scf', r'%cpcm', r'%basis'], [0] * 3,
            matching_index=True,
            last_line=True)

        if levelshift:
            levelshift = inp.lines[levelshift + 1]
            levelshift = levelshift.split()
            levelshift = levelshift[2]
        if solvent:
            solvent = inp.lines[solvent + 1]
            solvent = solvent.split()
            solvent = solvent[1]
        if metal_basis:
            metal_basis = inp.lines[metal_basis + 1]
            metal_basis = metal_basis.split()
            metal_basis = metal_basis[2]
            metal_basis = metal_basis[1:-1]

        levelshifta, levelshiftb = levelshift, levelshift
        if ligand_basis == '6-31G*' and metal_basis == 'LANL2DZ':
            basis = 'lacvps_ecp'
        else:
            raise Exception(
                'read_infile() is unable to parse this basis set/ecp combo: ' +
                ligand_basis + ' ' + metal_basis)

        # The following settings should not appear in a orca infile because they are not specified in the write_input() functionality for orca
        hfx, convergence_thresholds, multibasis, dispersion, guess, constraints = None, None, None, None, None, None

    return_dict = {}

    for prop, prop_name in zip([
            unique_job_name, charge, spinmult, solvent, run_type, levelshifta,
            levelshiftb, method, hfx, basis, convergence_thresholds,
            multibasis, constraints, dispersion, coordinates, guess, qm_code
    ], [
            'name', 'charge', 'spinmult', 'solvent', 'run_type', 'levelshifta',
            'levelshiftb', 'method', 'hfx', 'basis', 'convergence_thresholds',
            'multibasis', 'constraints', 'dispersion', 'coordinates', 'guess',
            'qm_code'
    ]):
        return_dict[prop_name] = prop
    return return_dict

Example #7

0

Show file

import os
import glob
import numpy as np
from molSimplify.job_manager.classes import textfile
from molSimplify.job_manager.manager_io import get_scf_progress


def bind_direct(this_run, jobname, basedir, case, keyinout, suffix=''):
    case_attr = case + suffix
    setattr(this_run, case_attr, False)
    outfile = basedir + "/" + "%s_%s" % (jobname, case) + "/" + "%s_%s.out" % (
        jobname, case)  ## we know the name of outfile
    if os.path.isfile(outfile):
        setattr(this_run, case_attr, np.nan)
        output = textfile(outfile)
        v = output.wordgrab([keyinout[0]], [keyinout[1]], last_line=True)[0]
        if not v == None:
            setattr(this_run, case_attr, v)


def bind_with_search(this_run, jobname, basedir, case, keyinout, ref=False):
    setattr(this_run, case, False)
    search_dir = basedir + "/" + "%s_%s" % (jobname, case)
    if os.path.isdir(search_dir):
        setattr(this_run, case, dict())
        for dirpath, dirs, files in os.walk(search_dir):
            for file in sorted(files):
                if file.split('.')[-1] == 'out' and not any(
                        "_v%d.out" % x in file
                        for x in range(10)):  # search for outfiles
                    outfile = dirpath + '/' + file