def config_test_6(self): self.skipTest('hangs, issue logged at https://github.com/brendanarnold/py-fortranformat/issues/15') '''Custom G_INPUT_TRIAL_EDS''' config.G_INPUT_TRIAL_EDS = ['L'] ff = FortranRecordReader('(G10.2)') result = ff.read(' 0') self.assertEqual(result, [None])
def parse_resp_charges(f: TextIO) -> List[Charge]: """Parse a file in the ``resp`` charges format Parameters ---------- f : TextIO File object opened in read mode containing charges in the ``resp`` format. Raises ------ InputFormatError Raised when the file does not follow the expected format. Returns ------- typing.List[Charge] List of charges described in the given input file. """ formatter = FR("8F10.6") try: return list( map( Charge, filter(lambda elem: elem is not None, reduce(add, [formatter.read(line) for line in f], [])))) except ValueError as e: raise InputFormatError(e)
def config_test_2(self): '''Test case reported on float''' ff = FortranRecordReader('(6g13.5)') result = ff.read( ' 1.0000 9.0000 105.09 1.0000 ' ) config.RET_UNWRITTEN_VARS_NONE = True self.assertEqual(result, [1.0, 9.0, 105.09, 1.0, 0.0, 0.0])
def _readArray(self, fp, which, nelm, format): #print 'Reading %d values with format %s' % (nelm, format) fmt = FortranRecordReader(format) ind = 0 while ind < nelm-1: fdata = fmt.read(fp.readline()) ind2 = min(ind + len(fdata), nelm) which[ind:ind2] = fdata[:ind2-ind] ind = ind2 # Read last line, if any. if ind < nelm: fdata = fmt.read(fp.readline()) which[ind:] = fdata[:nelm-ind] return
def _readArray(self, fp, which, nelm, format): #print 'Reading %d values with format %s' % (nelm, format) fmt = FortranRecordReader(format) ind = 0 while ind < nelm - 1: fdata = fmt.read(fp.readline()) ind2 = min(ind + len(fdata), nelm) which[ind:ind2] = fdata[:ind2 - ind] ind = ind2 # Read last line, if any. if ind < nelm: fdata = fmt.read(fp.readline()) which[ind:] = fdata[:nelm - ind] return
def test_6(self): # self.skipTest( # 'hangs, issue logged at https://github.com/brendanarnold/py-fortranformat/issues/15') '''Custom G_INPUT_TRIAL_EDS''' config.G_INPUT_TRIAL_EDS = ['L'] ff = FortranRecordReader('(G10.2)') self.assertRaises(ValueError, ff.read, ' 0')
def __init__(self, pattern, name=None, skip=None, strip_whitespace=True, map_values=None, after_read_hook=None): self._reader = FortranRecordReader(pattern) self.name = name self._skip = [skip] if isinstance(skip, int) else skip self._strip_whitespace = strip_whitespace self._map_values = map_values if isinstance(map_values, dict) else None self._after_read_hook = after_read_hook
def fort_read(fobj, formatstr, none_as=None, debug=False): frr = FortranRecordReader(formatstr) if not isinstance(fobj, str): fname = fobj.name inpline = fobj.readline() else: fname = 'console' inpline = fobj res = frr.read(inpline) if none_as is not None: res = [none_as if x is None else x for x in res] if debug: print('--- reading ---') print('file: ' + fname) print('fmt: ' + formatstr) print('str: ' + inpline) return res
def __init__(self, format, fields, fixed_fields=(), name=None, post_read_hook=None): self._fields = fields self._fixed_fields = tuple(fixed_fields) self._reader = FortranRecordReader(format) self._writer = FortranRecordWriter(format) self.name = name self.post_read_hook = post_read_hook self.data = {} for f in fields: if f is not None: self.data[f] = None
class FortranLineParser(object): def __init__(self, pattern, name=None, skip=None, strip_whitespace=True, map_values=None, after_read_hook=None): self._reader = FortranRecordReader(pattern) self.name = name self._skip = [skip] if isinstance(skip, int) else skip self._strip_whitespace = strip_whitespace self._map_values = map_values if isinstance(map_values, dict) else None self._after_read_hook = after_read_hook def __call__(self, line): data = self._reader.read(line) if self._skip: skip = self._skip data = [_ for i, _ in enumerate(data) if i not in skip] if self._strip_whitespace: data = [(_.strip() if isinstance(_, str) else _) for _ in data] if self._map_values: _map = self._map_values data = [(_map[_] if _ in _map else _) for _ in data] if self._after_read_hook: data = self._after_read_hook(data) return data
def _fortranRead(self, stream, format): fmt = FortranRecordReader(format) fdata = fmt.read(stream) return fdata
class Parser(object): URLTEMPLATE = "https://ruc.noaa.gov/raobs/GetRaobs.cgi?shour=All+Times<ype=All+Levels&wunits=Tenths+of+Meters&bdate={bdate}&edate={edate}&access=WMO+Station+Identifier&view=NO&StationIDs={stationID}&osort=Station+Series+Sort&oformat=FSL+format+(ASCII+text)" HEADER = FortranRecordReader("(3i7,6x,a4,i7)") IDENT = FortranRecordReader("(3i7,f7.2,a1,f6.2,a1,i6,i7)") IDENT2 = FortranRecordReader("(i7,10x,a4,14x,i7,5x,a2)") VALS = FortranRecordReader("(7i7)") LINEIDENT = FortranRecordReader("(i7)") SPLIT_REGEX = re.compile("^ 254", re.M) def __init__(self, bdate, edate, stationID): self.bdate = bdate self.edate = edate self.stationID = stationID self.ur = self.URLTEMPLATE.format(bdate=self.bdate, edate=self.edate, stationID=self.stationID) self.soundings = [] def parse(self): req = requests.get(self.url) if "Sorry" in req.text or "ERROR" in req.text: print("ERROR, Something is wrong in your request") print(req.text) return content = [ " 254" + x for x in re.split(self.SPLIT_REGEX, req.text) ][1:] #content = [" 254" + x for x in req.text.split(" 254")][1:] for snd in content: snd_obj = Sounding() data = [] for line in snd.splitlines(): linecode = self.LINEIDENT.read(line) if linecode[0] == 254: header = self.HEADER.read(line) try: date = datetime.datetime.strptime( "{}-{}-{} {}:00:00".format(header[4], header[3].strip(), header[2], header[1]), "%Y-%b-%d %H:%M:%S") except ValueError: print( "Something has gone wrong with datetime parsing...." ) snd_obj.datetime = date elif linecode[0] == 1: ident = self.IDENT.read(line) snd_obj.wban, snd_obj.wmo, snd_obj.lat, snd_obj.lat_dir, snd_obj.lon, snd_obj.lon_dir, snd_obj_elev, snd_obj.rtime = ident[ 1], ident[2], ident[3], ident[4], ident[5], ident[ 6], ident[7], ident[8] print(snd_obj.lon, snd_obj.lat) elif linecode[0] == 2: checks = self.VALS.read(line) snd_obj.hydro, snd_obj.mxwd, snd_obj.tropl, snd_obj.lines, snd_obj.tindex, snd_obj.source = checks[ 1], checks[2], checks[3], checks[4], checks[5], checks[ 6] elif linecode[0] == 3: ident2 = self.IDENT2.read(line) snd_obj.staid, snd_obj.sonde, snd_obj.wsunits = ident2[ 1], ident2[2], ident2[3] elif 4 <= linecode[0] <= 9: vals = self.VALS.read(line) data.append(vals) else: raise ValueError snd_obj.data = pd.DataFrame(data=data, columns=Sounding.COLUMNS) snd_obj.data = snd_obj.data.replace(99999, np.nan) # print(snd_obj.data) self.soundings.append(snd_obj)
def import_qeinput(fname): """ This function imports a AiiDA structure from a Quantum ESPRESSO input file. :param fname: the file name that should be read """ import aiida.orm.data.structure as struct bohr = 0.52917720859 one_over_bohr = 1.0 / bohr cell_types = ["alat", "bohr", "angstrom"] pos_types = ["alat", "bohr", "angstrom", "crystal"] def generate_cell(ibrav, parameters): cellAlat = np.zeros((3, 3)) if ibrav == 1: cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = 0 cellAlat[1][1] = 1 cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = 1 elif ibrav == 2: cellAlat[0][0] = -0.5 cellAlat[0][1] = 0 cellAlat[0][2] = 0.5 cellAlat[1][0] = 0 cellAlat[1][1] = 0.5 cellAlat[1][2] = 0.5 cellAlat[2][0] = -0.5 cellAlat[2][1] = 0.5 cellAlat[2][2] = 0 elif ibrav == 3: cellAlat[0][0] = 0.5 cellAlat[0][1] = 0.5 cellAlat[0][2] = 0.5 cellAlat[1][0] = -0.5 cellAlat[1][1] = 0.5 cellAlat[1][2] = 0.5 cellAlat[2][0] = -0.5 cellAlat[2][1] = -0.5 cellAlat[2][2] = 0.5 elif ibrav == 4: cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = -0.5 cellAlat[1][1] = np.sqrt(3) / 2 cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[2] elif ibrav == 5: tx = np.sqrt((1 - parameters[3]) / 2) ty = np.sqrt((1 - parameters[3]) / 6) tz = np.sqrt((1 + 2 * parameters[3]) / 3) cellAlat[0][0] = tx cellAlat[0][1] = -ty cellAlat[0][2] = tz cellAlat[1][0] = 0 cellAlat[1][1] = 2 * ty cellAlat[1][2] = tz cellAlat[2][0] = -tx cellAlat[2][1] = -ty cellAlat[2][2] = tz elif ibrav == -5: tx = np.sqrt((1 - parameters[3]) / 2) ty = np.sqrt((1 - parameters[3]) / 6) tz = np.sqrt((1 + 2 * parameters[3]) / 3) u = tz - 2.0 * np.sqrt(2.0) * ty v = tz + np.sqrt(2.0) * ty a1 = 1 / np.sqrt(3.0) cellAlat[0][0] = a1 * u cellAlat[0][1] = a1 * v cellAlat[0][2] = a1 * v cellAlat[1][0] = a1 * v cellAlat[1][1] = a1 * u cellAlat[1][2] = a1 * v cellAlat[2][0] = a1 * v cellAlat[2][1] = a1 * v cellAlat[2][2] = a1 * u elif ibrav == 6: cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = 0 cellAlat[1][1] = 1 cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[2] elif ibrav == 7: cellAlat[0][0] = 0.5 cellAlat[0][1] = -0.5 cellAlat[0][2] = parameters[2] / 2 cellAlat[1][0] = 0.5 cellAlat[1][1] = 0.5 cellAlat[1][2] = parameters[2] / 2 cellAlat[2][0] = -0.5 cellAlat[2][1] = -0.5 cellAlat[2][2] = parameters[2] / 2 elif ibrav == 8: cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = 0 cellAlat[1][1] = parameters[1] cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[2] elif ibrav == 9: cellAlat[0][0] = 0.5 cellAlat[0][1] = parameters[1] / 2 cellAlat[0][2] = 0 cellAlat[1][0] = -0.5 cellAlat[1][1] = parameters[1] / 2 cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[1] elif ibrav == -9: cellAlat[0][0] = 0.5 cellAlat[0][1] = -parameters[1] / 2 cellAlat[0][2] = 0 cellAlat[1][0] = 0.5 cellAlat[1][1] = -parameters[1] / 2 cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[1] elif ibrav == 10: cellAlat[0][0] = 0.5 cellAlat[0][1] = 0 cellAlat[0][2] = parameters[2] / 2 cellAlat[1][0] = 0.5 cellAlat[1][1] = parameters[1] / 2 cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = parameters[1] / 2 cellAlat[2][2] = parameters[1] / 2 elif ibrav == 11: cellAlat[0][0] = 0.5 cellAlat[0][1] = parameters[1] / 2 cellAlat[0][2] = parameters[2] / 2 cellAlat[1][0] = -0.5 cellAlat[1][1] = parameters[1] / 2 cellAlat[1][2] = parameters[2] / 2 cellAlat[2][0] = -0.5 cellAlat[2][1] = -parameters[1] / 2 cellAlat[2][2] = parameters[2] / 2 elif ibrav == 12: gamma = np.arccos(parameters[3]) cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = parameters[1] * np.cos(gamma) cellAlat[1][1] = parameters[1] * np.sin(gamma) cellAlat[1][2] = 0 cellAlat[2][0] = 0 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[2] elif ibrav == -12: beta = np.arccos(parameters[4]) cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = 0 cellAlat[1][1] = parameters[1] cellAlat[1][2] = 0 cellAlat[2][0] = parameters[2] * np.sin(beta) cellAlat[2][1] = 0 cellAlat[2][2] = parameters[2] * np.cos(beta) elif ibrav == 13: gamma = np.arccos(parameters[3]) cellAlat[0][0] = 0.5 cellAlat[0][1] = 0 cellAlat[0][2] = -parameters[2] / 2 cellAlat[1][0] = parameters[1] * np.cos(gamma) cellAlat[1][1] = parameters[1] * np.sin(gamma) cellAlat[1][2] = 0 cellAlat[2][0] = 0.5 cellAlat[2][1] = 0 cellAlat[2][2] = parameters[2] / 2 elif ibrav == 14: alpha = np.arccos(parameters[3]) beta = np.arccos(parameters[4]) gamma = np.arccos(parameters[5]) cellAlat[0][0] = 1 cellAlat[0][1] = 0 cellAlat[0][2] = 0 cellAlat[1][0] = parameters[1] * np.cos(gamma) cellAlat[1][1] = parameters[1] * np.sin(gamma) cellAlat[1][2] = 0 cellAlat[2][0] = parameters[2] * np.cos(beta) cellAlat[2][1] = parameters[2] * ( np.cos(alpha) - np.cos(beta) * np.cos(gamma)) / (np.sin(gamma)) cellAlat[2][2] = parameters[2] * np.sqrt( 1 + 2 * np.cos(alpha) * np.cos(beta) * np.cos(gamma) - (np.cos(beta) * np.cos(beta)) - (np.cos(alpha) * np.cos(alpha)) - (np.cos(gamma) * np.cos(gamma))) / (np.sin(gamma)) else: raise Exception("ibrav [{0}] is not defined !".format(ibrav)) cell_angstrom = cellAlat * parameters[0] * bohr return cell_angstrom def get_pos(pos, coord_type, alat, cell): if coord_type == pos_types[0]: return np.array(pos) * alat * bohr if coord_type == pos_types[1]: return np.array(pos) * alat if coord_type == pos_types[3]: return np.dot(np.array(pos), np.array(cell)) return pos def get_num_from_name(name): """ Return the atomic number given a symbol string, or return zero if the symbol is not recognized """ from aiida.orm.data.structure import _atomic_numbers return _atomic_numbers.get(name, 0) def get_name_from_num(num): """ Return the atomic symbol given an atomic number (if num=0, return "X") :raise ValueError: if the number is not valid """ from aiida.common.constants import elements try: return elements[num]['symbol'] except (IndexError, TypeError): raise ValueError("'{}' is not a valid atomic number".format(num)) def sanitize(line_raw, sec=None): if sec is not None: return line_raw.split("!")[0].split(sec)[0] else: return line_raw.split("!")[0] import re import numpy as np from fortranformat import FortranRecordReader nat = 0 ntyp = 0 ibrav = 0 celldm = None a = None b = None c = None cosab = None cosac = None cosbc = None cell = None atomic_raw_kinds = None atomic_kinds = None atomic_kindnames = None atomic_pos = None atomic_masses_table = None #group(3) nat_search = re.compile( r'(?:^|[^A-Za-z0-9_])nat([\t ]+)?=([\t ]+)?([0-9]+)', re.IGNORECASE) ntyp_search = re.compile( r'(?:^|[^A-Za-z0-9_])ntyp([\t ]+)?=([\t ]+)?([0-9]+)', re.IGNORECASE) ibrav_search = re.compile( r'(?:^|[^A-Za-z0-9_])ibrav([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) celldm_search = re.compile( r'(?:^|[^A-Za-z0-9_])celldm([\t ]+)?\(([\t ]+)?([0-9]+)([\t ]+)?\)([\t ]+)?=([\t ]+)?([0-9\.DdeE-]*)', re.IGNORECASE) a_search = re.compile(r'(?:^|[^A-Za-z0-9_])a([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) b_search = re.compile(r'(?:^|[^A-Za-z0-9_])b([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) c_search = re.compile(r'(?:^|[^A-Za-z0-9_])c([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) cosab_search = re.compile( r'(?:^|[^A-Za-z0-9_])cosab([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) cosac_search = re.compile( r'(?:^|[^A-Za-z0-9_])cosac([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) cosbc_search = re.compile( r'(?:^|[^A-Za-z0-9_])cosbc([\t ]+)?=([\t ]+)?(-?[0-9]+)', re.IGNORECASE) atomic_species = re.compile(r'ATOMIC_SPECIES', re.IGNORECASE) atomic_position = re.compile(r'ATOMIC_POSITIONS', re.IGNORECASE) cell_parameters = re.compile(r'CELL_PARAMETERS', re.IGNORECASE) #group(3) e group(7) ff_float = FortranRecordReader('F15.9') f = open(fname, "rw+") line = sanitize(f.readline()) while line: if nat_search.search(line): nat = int(nat_search.search(line).group(3)) if ntyp_search.search(line): ntyp = int(ntyp_search.search(line).group(3)) if ibrav_search.search(line): ibrav = int(ibrav_search.search(line).group(3)) if celldm_search.search(line) and cell is None: if celldm is None: celldm = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for c in celldm_search.findall(line): celldm[int(c[2]) - 1] = ff_float.read(c[6])[0] if a_search.search(line): a = float(a_search.search(line).group(3)) if b_search.search(line): b = float(b_search.search(line).group(3)) if c_search.search(line): c = float(c_search.search(line).group(3)) if cosab_search.search(line): cosab = float(cosab_search.search(line).group(3)) if cosac_search.search(line): cosac = float(cosac_search.search(line).group(3)) if cosbc_search.search(line): cosbc = float(cosbc_search.search(line).group(3)) if cell_parameters.search( line) and nat > 0 and ibrav == 0 and cell is None: if celldm is not None and a is not None: raise Exception( "Cannot declare both celldm and A, B, C, cosAB, cosAC, cosBC (a={}, celldm={})" .format(a, celldm)) if a is not None: alat = a else: if celldm is None: celldm = [one_over_bohr, 0.0, 0.0, 0.0, 0.0, 0.0] alat = celldm[0] cell_type = cell_types[0] #Alat for ct in cell_types: if ct in line.lower(): cell_type = ct cell = np.zeros((3, 3)) cell_ax = 0 while cell_ax < 3: line = sanitize(f.readline()) while (line.strip() == "" or line.strip().startswith("!")) == "": line = sanitize(f.readline(), sec="#") cell_ax_val = np.array( [ff_float.read(p)[0] for p in line.strip().split()]) if cell_type == cell_types[0]: cell_ax_val *= alat * bohr if cell_type == cell_types[1]: cell_ax_val *= alat cell[cell_ax] = cell_ax_val cell_ax += 1 if atomic_species.search(line) and ntyp > 0: atomic_raw_kinds = [None] * ntyp atomic_kinds = [None] * ntyp atomic_masses_table = {} type_count = 0 while type_count < ntyp: line = sanitize(f.readline()) while (line.strip() == "" or line.strip().startswith("!")): line = sanitize(f.readline(), sec="#") p = line.strip().split() raw = { 'symbols': p[0], 'weights': 1.0, 'name': p[0], 'mass': float(p[1]) } _n = get_num_from_name(p[0]) atomic_masses_table[_n] = float(p[1]) atomic_raw_kinds[type_count] = raw atomic_kinds[type_count] = struct.Kind(symbols=p[0], weights=1.0, name=p[0], mass=float(p[1])) type_count += 1 if atomic_position.search(line) and nat > 0: pos_type = "alat" for pt in pos_types: if pt in line.lower(): pos_type = pt atomic_pos = [[0] * 3] * nat atomic_kindnames = [0] * nat pos_count = 0 while pos_count < nat: line = sanitize(f.readline()) while (line.strip() == "" or line.strip().startswith("!")): line = sanitize(f.readline(), sec="#") p = line.strip().split() atomic_kindnames[pos_count] = p[0] atomic_pos[pos_count] = [ ff_float.read(p[i])[0] for i in range(1, 4) ] pos_count += 1 line = sanitize(f.readline()) # Cell generation if celldm is not None and a is not None: raise Exception( "Cannot declare both celldm and A, B, C, cosAB, cosAC, cosBC (a={}, celldm={})" .format(a, celldm)) if a is not None and \ b is not None and \ c is not None and \ cosbc is not None and \ cosac is not None and \ cosbc is not None and \ cell is None: celldm = [a, b / a, c / a, cosab, cosac, cosbc, 0.0] cell = generate_cell(ibrav, celldm) if celldm is not None and ibrav > 0 and cell is None: cell = generate_cell(ibrav, celldm) # Positions generation for i in range(len(atomic_pos)): atomic_pos[i] = get_pos(atomic_pos[i], pos_type, celldm[0], cell) the_struc = struct.StructureData(cell=cell, pbc=True) for k in atomic_kinds: the_struc.append_kind(k) sites = zip(atomic_kindnames, atomic_pos) for kindname, pos in sites: the_struc.append_site(struct.Site(kind_name=kindname, position=pos)) return the_struc
import pandas as pd from fortranformat import FortranRecordReader fline=FortranRecordReader('(a1,i3,i5,i5,i5,1x,a3,a4,1x,f13.5,f11.5,f11.3,f9.3,1x,a2,f11.3,f9.3,1x,i3,1x,f12.5,f11.5)') from collections import namedtuple filename="mass16.txt" # filename="mass.mas12" df_cols=["NZ", "N", "Z", "A", "el","o", "massexcess", "uncmassex", "binding", "uncbind","B", "beta", "uncbeta", "am_int", "am_float", "uncatmass"] record=namedtuple('nucleo','cc NZ N Z A el o massexcess uncmassex binding uncbind B beta uncbeta am_int am_float uncatmass') df = pd.DataFrame(columns=df_cols) f=open(filename,'r') print("converting...") for line_i, line in enumerate(f): nucl=record._make(fline.read(line)) df2 = pd.DataFrame([[nucl.NZ, nucl.N, nucl.Z, nucl.A, nucl.el, nucl.o, nucl.massexcess, nucl.uncmassex, nucl.binding, nucl.uncbind, nucl.B, nucl.beta, nucl.uncbeta, nucl.am_int ,nucl.am_float, nucl.uncatmass]], columns=df_cols) df= df.append(df2, ignore_index=True) print(df) df.to_csv("ame2016.csv")
class DataCard: """ Class to implement a line of generalized ATP/Fortran style input records format is a format string suitable for the fortranformat module. fields is a list of field names for indexing the data dict. Field names will usually be strings, but could be integers or floats in the case of matching fixed_fields. fixed_fields is an iterable of field indices that have fixed values. The expected value for the field should be the field name in the fields list. post_read_hook is an optional parameter indicating a function to be called after reading lines into the DataCard. Data in the line is internally represented using a dict. format and fields should not be changed after initialization. Reads only one line, but should be passed an interable of lines. """ def __init__(self, format, fields, fixed_fields=(), name=None, post_read_hook=None): self._fields = fields self._fixed_fields = tuple(fixed_fields) self._reader = FortranRecordReader(format) self._writer = FortranRecordWriter(format) self.name = name self.post_read_hook = post_read_hook self.data = {} for f in fields: if f is not None: self.data[f] = None def read(self, lines): """ Read in datalines with validation prior to populating data. """ if not self.match(lines): # This should raise an exception and will help # identify where in the stack the exception occured. tmp = copy.deepcopy(self) tmp._read(lines) self._read(lines) def _read(self, lines): line = lines[0] data = self._reader.read(line) for f in self._fixed_fields: if data[f] != self._fields[f]: raise ValueError('Fixed field with wrong value: ' + data[f] + '/' + self._fields[f]) for f, d in zip(self._fields, data): if f is not None: self.data[f] = d if self.post_read_hook is not None: self.post_read_hook(self) return self def write(self): data = [self.data[f] if f is not None else None for f in self._fields] return self._writer.write(data) def match(self, lines): """ Checks if text lines match record type. Does not modify card data. """ tmp = copy.deepcopy(self) try: tmp._read(lines) except ValueError: return False return True def num_lines(self): return 1
def config_test_2(self): '''Default G_INPUT_TRIAL_EDS''' ff = FortranRecordReader('(G10.2)') result = ff.read(' .T.') self.assertEqual(result, [True])
class FortranFormat(object): """ Processes Fortran format strings according to the Fortran specification for such formats. This object handles reading and writing data with any valid Fortran format. It does this by using the `fortranformat` project [https://bitbucket.org/brendanarnold/py-fortranformat]. However, while `fortranformat` is very general and adheres well to the standard, it is very slow. As a result, simple, common format strings have been optimized and processes reads and writes between 3 and 5 times faster. The format strings (case-insensitive) of the following form (where # can be replaced by any number) are optimized: - #E#.# - #D#.# - #F#.# - #(F#.#) - #a# - #I# Parameters ---------- format_string : str The Fortran Format string to process strip_strings : bool=True If True, strings are stripped before being processed by stripping (only) trailing whitespace """ strre = re.compile(r'(\d+)?a(\d+)$', re.I) intre = re.compile(r'(\d+)?i(\d+)$', re.I) floatre = re.compile(r'(\d+)?[edf](\d+)\.(\d+)$', re.I) floatre2 = re.compile(r'(\d+)?\([edf](\d+)\.(\d+)\)$', re.I) #=================================================== def __init__(self, format_string, strip_strings=True): """ Sets the format string and determines how we will read and write strings using this format """ self.format = format_string self.strip_strings = strip_strings # for ease of copying # Define a function that processes all arguments prior to adding them to # the returned list. By default, do nothing, but this allows us to # optionally strip whitespace from strings. self.process_method = lambda x: x if FortranFormat.strre.match(format_string): rematch = FortranFormat.strre.match(format_string) # replace our write() method with write_string to force left-justify self.type, self.write = str, self._write_string nitems, itemlen = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.fmt = '%s' # See if we want to strip the strings if strip_strings: self.process_method = lambda x: x.strip() elif FortranFormat.intre.match(format_string): self.type = int rematch = FortranFormat.intre.match(format_string) nitems, itemlen = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.fmt = '%%%dd' % self.itemlen elif FortranFormat.floatre.match(format_string): self.type = float rematch = FortranFormat.floatre.match(format_string) nitems, itemlen, num_decimals = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.num_decimals = int(num_decimals) if 'F' in format_string.upper(): self.fmt = '%%%s.%sF' % (self.itemlen, self.num_decimals) else: self.fmt = '%%%s.%sE' % (self.itemlen, self.num_decimals) elif FortranFormat.floatre2.match(format_string): self.type = float rematch = FortranFormat.floatre2.match(format_string) nitems, itemlen, num_decimals = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.num_decimals = int(num_decimals) if 'F' in format_string.upper(): self.fmt = '%%%s.%sF' % (self.itemlen, self.num_decimals) else: self.fmt = '%%%s.%sE' % (self.itemlen, self.num_decimals) else: # We tried... now just use the fortranformat package self._reader = FortranRecordReader(format_string) self._writer = FortranRecordWriter(format_string) self.write = self._write_ffwriter self.read = self._read_ffreader #=================================================== def __copy__(self): return type(self)(self.format, self.strip_strings) #=================================================== def __str__(self): return self.format #=================================================== def write(self, items, dest): """ Writes an iterable of data (or a single item) to the passed file-like object Parameters ---------- items : iterable or single float/str/int These are the objects to write in this format. The types of each item should match the type specified in this Format for that argument dest : file or file-like This is the file to write the data to. It must have a `write` method or an AttributeError will be raised Notes ----- This method may be replaced with _write_string (for #a#-style formats) or _write_ffwriter in the class initializer if no optimization is provided for this format, but the call signatures and behavior are the same for each of those functions. """ if hasattr(items, '__iter__') and not isinstance(items, basestring): mod = self.nitems - 1 for i, item in enumerate(items): dest.write(self.fmt % item) if i % self.nitems == mod: dest.write('\n') if i % self.nitems != mod: dest.write('\n') else: dest.write(self.fmt % item) dest.write('\n') #=================================================== def _write_string(self, items, dest): """ Writes a list/tuple of strings """ if hasattr(items, '__iter__') and not isinstance(items, basestring): mod = self.nitems - 1 for i, item in enumerate(items): dest.write((self.fmt % item).ljust(self.itemlen)) if i % self.nitems == mod: dest.write('\n') if i % self.nitems != mod: dest.write('\n') else: dest.write((self.fmt % item).ljust(self.itemlen)) dest.write('\n') #=================================================== def _read_nostrip(self, line): """ Reads the line and returns converted data. Special-cased for flags that may contain 'blank' data. ugh. """ line = line.rstrip('\n') nitems = int(ceil(len(line) / self.itemlen)) ret = [0 for i in xrange(nitems)] start, end = 0, self.itemlen for i in xrange(nitems): ret[i] = self.process_method(self.type(line[start:end])) start = end end += self.itemlen return ret #=================================================== def read(self, line): """ Reads the line and returns the converted data """ line = line.rstrip() nitems = int(ceil(len(line) / self.itemlen)) ret = [0 for i in xrange(nitems)] start, end = 0, self.itemlen for i in xrange(nitems): ret[i] = self.process_method(self.type(line[start:end])) start = end end += self.itemlen return ret #=================================================== def _read_ffreader(self, line): """ Reads the line and returns the converted data """ return self._reader.read(line.rstrip()) #=================================================== def _write_ffwriter(self, items, dest): dest.write('%s\n' % self._writer.write(items))
class FortranFormat(object): """ Processes Fortran format strings according to the Fortran specification for such formats. This object handles reading and writing data with any valid Fortran format. It does this by using the `fortranformat` project [https://bitbucket.org/brendanarnold/py-fortranformat]. However, while `fortranformat` is very general and adheres well to the standard, it is very slow. As a result, simple, common format strings have been optimized and processes reads and writes between 3 and 5 times faster. The format strings (case-insensitive) of the following form (where # can be replaced by any number) are optimized: - #E#.# - #D#.# - #F#.# - #(F#.#) - #a# - #I# Parameters ---------- format_string : str The Fortran Format string to process strip_strings : bool=True If True, strings are stripped before being processed by stripping (only) trailing whitespace """ strre = re.compile(r'(\d+)?a(\d+)$', re.I) intre = re.compile(r'(\d+)?i(\d+)$', re.I) floatre = re.compile(r'(\d+)?[edf](\d+)\.(\d+)$', re.I) floatre2 = re.compile(r'(\d+)?\([edf](\d+)\.(\d+)\)$', re.I) #=================================================== def __init__(self, format_string, strip_strings=True): """ Sets the format string and determines how we will read and write strings using this format """ self.format = format_string self.strip_strings = strip_strings # for ease of copying # Define a function that processes all arguments prior to adding them to # the returned list. By default, do nothing, but this allows us to # optionally strip whitespace from strings. self.process_method = lambda x: x if FortranFormat.strre.match(format_string): rematch = FortranFormat.strre.match(format_string) # replace our write() method with write_string to force left-justify self.type, self.write = str, self._write_string nitems, itemlen = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.fmt = '%s' # See if we want to strip the strings if strip_strings: self.process_method = lambda x: x.strip() elif FortranFormat.intre.match(format_string): self.type = int rematch = FortranFormat.intre.match(format_string) nitems, itemlen = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.fmt = '%%%dd' % self.itemlen elif FortranFormat.floatre.match(format_string): self.type = float rematch = FortranFormat.floatre.match(format_string) nitems, itemlen, num_decimals = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.num_decimals = int(num_decimals) if 'F' in format_string.upper(): self.fmt = '%%%s.%sF' % (self.itemlen, self.num_decimals) else: self.fmt = '%%%s.%sE' % (self.itemlen, self.num_decimals) elif FortranFormat.floatre2.match(format_string): self.type = float rematch = FortranFormat.floatre2.match(format_string) nitems, itemlen, num_decimals = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.num_decimals = int(num_decimals) if 'F' in format_string.upper(): self.fmt = '%%%s.%sF' % (self.itemlen, self.num_decimals) else: self.fmt = '%%%s.%sE' % (self.itemlen, self.num_decimals) else: # We tried... now just use the fortranformat package self._reader = FortranRecordReader(format_string) self._writer = FortranRecordWriter(format_string) self.write = self._write_ffwriter self.read = self._read_ffreader #=================================================== def __copy__(self): return type(self)(self.format, self.strip_strings) #=================================================== def __str__(self): return self.format #=================================================== def write(self, items, dest): """ Writes an iterable of data (or a single item) to the passed file-like object Parameters ---------- items : iterable or single float/str/int These are the objects to write in this format. The types of each item should match the type specified in this Format for that argument dest : file or file-like This is the file to write the data to. It must have a `write` method or an AttributeError will be raised Notes ----- This method may be replaced with _write_string (for #a#-style formats) or _write_ffwriter in the class initializer if no optimization is provided for this format, but the call signatures and behavior are the same for each of those functions. """ if hasattr(items, '__iter__') and not isinstance(items, string_types): mod = self.nitems - 1 for i, item in enumerate(items): dest.write(self.fmt % item) if i % self.nitems == mod: dest.write('\n') if i % self.nitems != mod: dest.write('\n') else: dest.write(self.fmt % item) dest.write('\n') #=================================================== def _write_string(self, items, dest): """ Writes a list/tuple of strings """ if hasattr(items, '__iter__') and not isinstance(items, string_types): mod = self.nitems - 1 for i, item in enumerate(items): dest.write((self.fmt % item).ljust(self.itemlen)) if i % self.nitems == mod: dest.write('\n') if i % self.nitems != mod: dest.write('\n') else: dest.write((self.fmt % item).ljust(self.itemlen)) dest.write('\n') #=================================================== def _read_nostrip(self, line): """ Reads the line and returns converted data. Special-cased for flags that may contain 'blank' data. ugh. """ line = line.rstrip('\n') nitems = int(ceil(len(line) / self.itemlen)) ret = [0 for i in range(nitems)] start, end = 0, self.itemlen for i in range(nitems): ret[i] = self.process_method(self.type(line[start:end])) start = end end += self.itemlen return ret #=================================================== def read(self, line): """ Reads the line and returns the converted data """ line = line.rstrip() nitems = int(ceil(len(line) / self.itemlen)) ret = [0 for i in range(nitems)] start, end = 0, self.itemlen for i in range(nitems): ret[i] = self.process_method(self.type(line[start:end])) start = end end += self.itemlen return ret #=================================================== def _read_ffreader(self, line): """ Reads the line and returns the converted data """ return self._reader.read(line.rstrip()) #=================================================== def _write_ffwriter(self, items, dest): dest.write('%s\n' % self._writer.write(items))
def test_5(self): '''Custom G_INPUT_TRIAL_EDS''' config.G_INPUT_TRIAL_EDS = ['A'] ff = FortranRecordReader('(G10.2)') result = ff.read(' 0') self.assertEqual(result, [' 0'])
def test_4(self): '''Default G_INPUT_TRIAL_EDS''' ff = FortranRecordReader('(G10.2)') result = ff.read(' STR') self.assertEqual(result, [' STR'])
def test_3(self): '''Default G_INPUT_TRIAL_EDS''' ff = FortranRecordReader('(G10.2)') result = ff.read(' .F.') self.assertEqual(result, [False])
def __init__(self, format_string, strip_strings=True): """ Sets the format string and determines how we will read and write strings using this format """ self.format = format_string self.strip_strings = strip_strings # for ease of copying # Define a function that processes all arguments prior to adding them to # the returned list. By default, do nothing, but this allows us to # optionally strip whitespace from strings. self.process_method = lambda x: x if FortranFormat.strre.match(format_string): rematch = FortranFormat.strre.match(format_string) # replace our write() method with write_string to force left-justify self.type, self.write = str, self._write_string nitems, itemlen = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.fmt = '%s' # See if we want to strip the strings if strip_strings: self.process_method = lambda x: x.strip() elif FortranFormat.intre.match(format_string): self.type = int rematch = FortranFormat.intre.match(format_string) nitems, itemlen = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.fmt = '%%%dd' % self.itemlen elif FortranFormat.floatre.match(format_string): self.type = float rematch = FortranFormat.floatre.match(format_string) nitems, itemlen, num_decimals = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.num_decimals = int(num_decimals) if 'F' in format_string.upper(): self.fmt = '%%%s.%sF' % (self.itemlen, self.num_decimals) else: self.fmt = '%%%s.%sE' % (self.itemlen, self.num_decimals) elif FortranFormat.floatre2.match(format_string): self.type = float rematch = FortranFormat.floatre2.match(format_string) nitems, itemlen, num_decimals = rematch.groups() if nitems is None: self.nitems = 1 else: self.nitems = int(nitems) self.itemlen = int(itemlen) self.num_decimals = int(num_decimals) if 'F' in format_string.upper(): self.fmt = '%%%s.%sF' % (self.itemlen, self.num_decimals) else: self.fmt = '%%%s.%sE' % (self.itemlen, self.num_decimals) else: # We tried... now just use the fortranformat package self._reader = FortranRecordReader(format_string) self._writer = FortranRecordWriter(format_string) self.write = self._write_ffwriter self.read = self._read_ffreader
def config_test_1(self): '''Default RET_UNWRITTEN_VARS_NONE (True)''' ff = FortranRecordReader('(3G10.2)') result = ff.read(' 0 0') self.assertEqual(result, [0.0, 0.0, None])
def config_test_3(self): '''RET_WRITTEN_VARS_ONLY = False''' config.RET_UNWRITTEN_VARS_NONE = False ff = FortranRecordReader('(3G10.2)') result = ff.read(' 1 1') self.assertEqual(result, [0.01, 0.01, 0.0])
def config_test_4(self): '''RET_WRITTEN_VARS_ONLY = True''' config.RET_UNWRITTEN_VARS_NONE = True ff = FortranRecordReader('(3G10.2)') result = ff.read(' 0 0') self.assertEqual(result, [0.0, 0.0, None])
def processsolar(): solarfile = '/var/lib/odindata/' + 'sw.txt' solar = open(solarfile, 'r') line = solar.readline(-1) while line != "BEGIN OBSERVED\n": line = solar.readline(-1) res = FortranRecordReader( 'I4,I3,I3,I5,I3,8I3,I4,8I4,I4,F4.1,I2,I4,F6.1,I2,5F6.1') solardata = [] line = solar.readline(-1) while line != "END OBSERVED\n": solardata.append(res.read(line)) line = solar.readline(-1) solardata = np.array(solardata) dates = [] for i in range(solardata.shape[0]): dates.append( DT.date( solardata[i, 0].astype(int), solardata[i, 1].astype(int), solardata[i, 2].astype(int), ), ) # solardata 13=Kpsum 22=APavg 26=f10.7 while line != "BEGIN DAILY_PREDICTED\n": line = solar.readline(-1) solardatapred = [] line = solar.readline(-1) while line != "END DAILY_PREDICTED\n": solardatapred.append(res.read(line)) line = solar.readline(-1) while line != "BEGIN MONTHLY_PREDICTED\n": line = solar.readline(-1) line = solar.readline(-1) while line != "END MONTHLY_PREDICTED\n": solardatapred.append(res.read(line)) line = solar.readline(-1) solardatapred = np.array(solardatapred) datespred = [] for i in range(solardatapred.shape[0]): datespred.append( DT.date( solardatapred[i, 0].astype(int), solardatapred[i, 1].astype(int), solardatapred[i, 2].astype(int), )) solar.close() dbfile = '/var/lib/odindata/' + 'Solardata2.db' db = sqlite.connect(dbfile) cur = db.cursor() # cur.execute('create table solardata (ID BIGINT, yy shortint, mm shortint, # dd shortint, BSRN shortint, ND shortint, Kp1 shortint, Kp2 shortint, # Kp3 shortint, Kp4 shortint, Kp5 shortint, Kp6 shortint, Kp7 short int, # Kp8 shortint, KpSum shortint, Ap1 shortint, Ap2 shortint, # Ap3 shortint, Ap4 shortint, Ap5 shortint, Ap6 shortint, # Ap7 shortint, Ap8 shortint, ApAvg shortint, Cp float, C9 shortint, # ISN Integer, AdjF10_7 float, Q shortint, AdjCtr81 float, # AdjLst81 float, ObsF10_7 float, ObsCtr81 float, ObsLst81 float )') instr = 'insert or replace into solardata values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )' # noqa for i in range(solardata.shape[0]): ids = (solardata[i, 0].astype(int) * 10000 + solardata[i, 1].astype(int) * 100 + solardata[i, 2].astype(int)) cur.execute(instr, np.r_[ids, solardata[i, :]]) # cur.execute('create table solardatapred (ID BIGINT, pred_date bigint, # yy shortint, mm shortint, dd shortint, BSRN shortint, ND shortint, # Kp1 shortint, Kp2 shortint, Kp3 shortint, Kp4 shortint, Kp5 shortint # Kp6 shortint, Kp7 short int, Kp8 shortint, KpSum shortint, Ap1 shortint, # Ap2 shortint, Ap3 shortint, Ap4 shortint, Ap5 shortint, Ap6 shortint, # Ap7 shortint, Ap8 shortint, ApAvg shortint, Cp float, C9 shortint, # ISN Integer, AdjF10_7 float, Q shortint, AdjCtr81 float, # AdjLst81 float, ObsF10_7 float, ObsCtr81 float, ObsLst81 float )') instr = 'insert or replace into solardatapred values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )' # noqa idp = ids for i in range(solardatapred.shape[0]): ids = (idp * 100000000 + solardatapred[i, 0].astype(int) * 10000 + solardatapred[i, 1].astype(int) * 100 + solardatapred[i, 2].astype(int)) cur.execute(instr, np.r_[ids, idp, solardatapred[i, :]]) db.commit() db.close()