def EDR(input_data): with open(input_data, 'r') as f: header={} for i, row in enumerate(f.readlines()): if i<2 or i==28: pass elif i<28: header.update(header_parser(row,':')) #read the header values into a dict elif i==29: row=row.split() shotnums=list(range(1,len(row)+1)) shots=['shot'+str(i) for i in shotnums] df = pd.read_csv(input_data, sep=' ',skiprows=29,names=shots) df=df.transpose() #insert the header metadata as columns for label,data in header.items(): df[label]=data return df
def CCS(input_data): df = pd.DataFrame.from_csv(input_data, header=14) df.rename(columns=lambda x: x.strip(),inplace=True) #strip whitespace from column names df=df.transpose() cols=df.columns.tolist() for i,x in enumerate(cols): cols[i]=('wvl',round(float(x),5)) df.columns=pd.MultiIndex.from_tuples(cols) #extract info from the file name fname=os.path.basename(input_data) df['sclock']=fname[4:13] df['sclock']=pd.to_numeric(df['sclock']) df['seqid']=fname[25:34].upper() df['Pversion']=fname[34:36] #transpose the data frame #read the file header and put information into the dataframe as new columns #(inefficient to store this data many times, but much easier to concatenate data from multiple files) with open(input_data,'r') as f: header={} for i,row in enumerate(f.readlines()): if i<14: row=row.split(',')[0] header.update(header_parser(row,'=')) for label,data in header.items(): if '_float' in label: label=label.replace('_float','') if label=='dark': label='darkspec' df[label]=data df.index.rename('shotnum',inplace=True) df.reset_index(level=0,inplace=True) return df