def create_system(series): names, units = np.transpose( [split_name_and_units(name) for name in series.index]) return [ pif.System(name, uid=url_friendly(lambda x: x)(name)) for name, unit, value in zip(names, units, series.values) ]
def to_system(row): def url_friendly(name): return re.sub(r"\W", "_", name) def wolf_contact(): return pif.Person(name='JP H. Paul', email='*****@*****.**', tags='Lincoln Electric (Wolf Robotics)') def cmu_contact(): return pif.Person(name='Anthony Rollett', email='*****@*****.**', tags='Carnegie Mellon University') def mines_contact(): return pif.Person(name='Branden Kappes', email='*****@*****.**', tags='Colorado School of Mines') def lmco_contact(): return pif.Person(name='Edward A. Pierson', email='*****@*****.**', tags='Lockheed Martin Corporation') # Every PIF record is a System object system = pif.System() # create a unique identifier. This must be URL friendly. system.uid = url_friendly(str(row['Sample Name'])) # name the PIF system.names = str(row['Sample Name']) # record the parent sample name system.sub_systems = pif.System(uid=url_friendly( str(row['Parent Sample Name'])), names=str(row['Sample Name'])) # set the contact information. By default, I set this as LMCO. system.contacts = { 'Wolf': wolf_contact, 'CMU': cmu_contact, 'Mines': mines_contact, 'LM': lmco_contact }.get(row['Contact'], lmco_contact)() # Certain fields we treat as special special = ['Sample Name', 'Parent Sample Name', 'Contact'] filelist = None for column, value in row.items(): # special columns have already been handled. if column in special: continue # no need to record empty record. if is_null(value): continue # handle FILEs. if column.startswith('FILE'): # add the FILE column to the list of special columns # so we don't add this as a property. special.append(column) # split on colon -- we don't need "FILE" in the name split = column.split(':') # Convert the filename (or the string-representation of # a list of filenames, e.g. [file1.png, file2.png]) to # a list of files value = str(value).strip("[]").split(",") # create a file reference for each file and store these as # a list of FileReference objects. for path in value: file = pif.FileReference(relative_path=str(path), tags=':'.join(split[1:]).strip()) try: filelist.append(file) except AttributeError: filelist = [file] # create a special property that holds all the files pty = pif.Property(name="Files", files=filelist) try: system.properties.append(pty) except AttributeError: # if this is the first property, append to a None value will # fail. Handle this edge case. system.properties = [pty] # everything else is a property for column, value in row.items(): # special columns have already been handled. if column in special: continue # ignore this value if empty if is_null(value): continue # scalar can only contain lists, dict, string, or Number value = { pd.Timestamp: str, tuple: list }.get(type(value), type(value))(value) # otherwise, construct a property value. try: pty = pif.Property(name=column, scalars=value) except: pty = pif.Property(name=column, scalars=str(value)) # print(f"{column}: {value}") # raise # units are stored, by convention in parentheses, e.g. # laser speed (mm/s). This regular expression extracts the the # last term surrounded by parentheses. try: pty.units = re.search('.*\(([^)]+)\)\s*$', column).group(1) except AttributeError: # no units were found... pass try: # add the property system.properties.append(pty) except AttributeError: # if this is the first property, append to a None value will # fail. Handle this edge case. system.properties = [pty] # done return system
def to_pif(table, name=None, uid=None, subsystems={}, properties={}, preparation={}, default=create_property): """ Creates a Citrination PIF object from the Container object. :param table, Container: The DataFrame-like table. Each row becomes a new pif.System object. :param name: Callable that takes a pd.Series and generates a name for the PIF. :param uid: None or a callable that takes a pandas.Series and generates a name for the PIF. If None, then the UID is a URL-friendly version of the name. :param subsystems, dict: Dictionary/map between the name of the field and a callable that accepts a pd.Series and produces a pif.System stored as a subsystem of the pif.System. :param properties, dict: Dictionary/map between the name of the field and a callable that accepts a pd.Series and produces a list of pif.Property objects stored in the pif.Systems. This is the default location for all data not otherwise directed. :param preparation, dict: Dictionary/map between the name of the field and a callable that accepts a pd.Series and produces a pif.Preparation object stored in the pif.System. :param default, unary function: Default operation for keys not covered by any other parameter, e.g. subsystems, properties, or preparation. :return: List of pif.System objects. """ container = table if name is None: container = set_index(table, 0) name = get_index if uid is None: uid = url_friendly(name) # set default location for data: properties defaultKeys = set(table.columns) # subsystems defaultKeys = defaultKeys - subsystems.keys() subsystems = match_regex(subsystems) # preparation defaultKeys = defaultKeys - preparation.keys() preparation = match_regex(preparation) # default location for data defaultKeys = defaultKeys - properties.keys() properties = match_regex(properties) # create the PIF and store the results results = [] for index, series in container.iterrows(): system = pif.System(name=name(series), uid=uid(series)) system.sub_systems = [ x for k, f in subsystems(series).items() for x in f(series[[k]]) ] system.properties = [ x for k, f in properties(series).items() for x in f(series[[k]]) ] system.preparation = [ x for k, f in preparation(series).items() for x in f(series[[k]]) ] for k in defaultKeys: result = default(series[[k]])[0] if isinstance(result, pif.System): system.sub_systems.append(result) elif isinstance(result, pif.ProcessStep): system.preparation.append(result) elif isinstance(result, pif.Property): system.properties.append(result) else: raise ValueError(f"The default in 'to_pif' produced an " f"unrecognized object of type {result}.") results.append(system) return results
def converter(files=[], **keywds): """ Summary ======= Converter to calculate strain data from Aramis CSV output. Input ===== :files, str or list: One or list of CSV-formatted files. Options ------- :timestep, float: Interval (seconds) with which strain data is collected. Output ====== PIF object or list of PIF objects """ # Handle required input parameters #+ ensure *files* is a list. If a single file is passed, convert it #+ into a list. if isinstance(files, str): files = [files] # Process filenames results = [] for fname in files: with open(fname) as ifs: # defaultcode/encoding is currently discarded junk = ifs.readline() # "Statistics export" line is currently discarded junk = ifs.readline() # refactor column names from Aramis names = [ entry.strip().lower() for entry in ifs.readline().split(',') ] #+ names[0] (strain stage): no change # #+ names[1] (strain) # `label` format: DESCRIPTION (REDUCTION): LABEL [UNITS] # desired format: LABEL (UNITS) label = names[1] fmark10tr = r'[^(]+\(([^)]+)\):\s*([^[]+)\[([^]]+)\]' try: reduction, label, units = re.search(fmark10tr, label).groups() names[1] = label except ValueError: msg = '"{}" in {} is not a valid label format.'.format( label, fname) raise ValueError(msg) # restructure names and units names = [names[0], label.strip()] units = ['None', units] # read in the data converters = dict((i, ensure_float) for i in range(len(names))) data = pd.read_csv(ifs, names=names, converters=converters) data.dropna(inplace=True) # list of properties extracted from the file results = [ pif.Property(name=name, scalars=list(data[name]), units=unit, files=pif.FileReference(relative_path=fname), methods=pif.Method( name='digital image correlation (DIC)', instruments=pif.Instrument(name='DIC', producer='Aramis')), data_type='EXPERIMENTAL', tag=reduction) for name, unit in zip(names, units) ] # strain (results[1]) transforms strain = results[1] #+ standardize naming convention strain_type = strain.name strain.name = 'strain' try: strain.tag.append(strain_type) except AttributeError: strain.tag = [strain.tag, strain_type] #+ is a transform from % strain necessary? if strain.units == '%': strain.scalars = list(np.divide(strain.scalars, 100.)) strain.units = 'mm/mm' # Determine the time at which each measurement was taken if 'timestep' in keywds: #+ ensure timestep is a float timestep = float(keywds['timestep']) time = list(data[names[0]] * timestep) replace_if_present_else_append( results, pif.Property(name='time', scalars=time, units='s', files=pif.FileReference(relative_path=fname), methods=pif.Method( name='digital image correlation (DIC)', instruments=pif.Instrument( name='DIC', producer='Aramis')), data_type='EXPERIMENTAL', tag=reduction), cmp=lambda A, B: A.name.lower() == B.name.lower()) # Wrap in system object results = pif.System(names='Aramis', properties=results, tags=files) # job's done! return results