def main(src, ovr = 0): from glob import glob as gg for fi in gg(pt.join(src, '*.pgz')): fo = pt.splitext(fi)[0] + '.rds' if pt.exists(fo) and not ovr: print 'exists:', fo else: __enc2rds__(fi, fo)
def main(datenpfad): for textdatei in gg(datenpfad): print(textdatei) text = read_textfile(textdatei) lowercase = make_lowercase(text) cleantext = remove_punctuation(lowercase) print(cleantext) print("Fertig.")
def main(textfolder): allresults = {} for textfile in gg(textfolder): filename = os.path.basename(textfile).split(".")[0] text = read_textfile(textfile) text = clean_text(text) textstats = get_textstats(text) allresults[filename] = textstats print(allresults)
def __sample_wm__(wrk): """ given a list of center vertices {cvs}, and hemispheres {hms}, pick regions from WM surface across subject in {src} """ ## fetch working specifications src = wrk['src'] # source directory with subjects dst = wrk['dst'] # target filenames hms = wrk['hms'] # hemispheres cvs = wrk['cvs'] # center vertices nbs = wrk['nbs'] # neighbor table for each vertex sz = wrk['sz'] # region size ## lists of output path, vertex indices, and connetion matrices lfo, lvi, lcn = [], [], [] for hm, nb, cv in izip(hms, nbs, cvs): vi, cn = __neighbor__(nb, cv, sz) lvi.append(vi) lcn.append(cn) lfo.append(pt.join(dst, '{}{:05X}'.format(hm, cv))) ## lists of surfaces to be sampled, and subjects lsf, lsb = [[] for i in xrange(len(lvi))], [] ## iterate all subjects print 'xt: sample ', len(lvi), 'WM areas from ', src, ':' for fn in gg(pt.join(src, '*')): if not fn.endswith('npz'): continue lsb.append(pt.basename(fn).split('.')[0]) print lsb[-1] sys.stdout.flush() wm = np.load(fn) ## sample surfaces for subject {sb} ## si: surface index, hm: hemisphere, vi: vertex indices for si, hm, vi in izip(xrange(len(lvi)), hms, lvi): lsf[si].append(wm[hm][vi]) if not len(lsb) < 5: break ## write the samples to file in numpy format. print 'xt: write WM samples to ', dst, ':' sys.stdout.flush() sbj = np.array(lsb) for sf, vi, cn, fo in izip(lsf, lvi, lcn, lfo): np.savez_compressed(fo + '.npz', sbj=sbj, vtx=np.vstack(sf), cmx=cn) vi = ['{:05X}'.format(i) for i in vi] __save_rds__(sf, lsb, vi, cn, fo + '.rds') print fo + ": created" sys.stdout.flush() print 'xt: success' sys.stdout.flush()
def get_pk(src, idx = 0): """ get data from pickle """ if pt.isdir(src): fn = gg(pt.join(src, "*"))[idx] else: fn = src with open(fn, 'rb') as fp: obj = cPickle.load(fp) print fn + ": fetched" return obj
def apec(src, dst, sn, hm, ovr=0): """ given a directory of subjects, extract anatomical regions src: subject source dst: where to put extracted regions sn: region serial number, 2~35 hm: hemesphere, lh or rh """ ## output path fo = pt.join(dst, '{}{:02d}.npz'.format(hm, sn)) if pt.exists(fo) and not ovr: print 'exists:', fo return ## fetch anatomical peceration table at = np.load('apec.npz') ## vertex indices vi = (at[hm]==sn).nonzero()[0] ## anatomy region id and name id, nm = at['tb'][sn]['id'], at['tb'][sn]['nm'] ## surface, and subject index vt, sb = [], [] ## iterate all subjects print 'xt: extract', hm, at['tb'][sn]['nm'], 'from ', src, ':' for fn in gg(pt.join(src, '*.npz')): sb.append(pt.basename(fn).split('.')[0]) print sb[-1] sys.stdout.flush() wm = np.load(fn) ## extract surfaces for subject {sb} ## hm: hemisphere, vi: vertex indices vt.append(wm[hm][vi]) wm.close() ## write the samples to file in numpy format. print 'xt: write surface to ', dst sys.stdout.flush() vt=np.vstack(vt) sb=np.array(sb) np.savez_compressed( fo, sb=sb, vt=vt, vi=vi, hm=hm, sn=sn, id=id, nm=nm) print 'xt: success' sys.stdout.flush()
def main(textfolder): for textfile in gg(textfolder): text = read_textfile(textfile) text = clean_text(text) textstats = get_textstats(text)
def itr_fn(src = "", fmt = 'n', flt = None, drop = True): """ filename iterator drop: drop list structure if only one file attribute is returned. format code: n: file name, N: absolute file name c: core name, C: absolute core name b: base name, B: absolute base name d: directory, D: absolute directory e: extension, E: absolute extension """ src = resolve_path(src) if pt.isdir(src): src = pt.join(src, "*") if flt == None: flt = lambda w: True i = 0 for fn in gg(src): if not flt(fn): continue rt = [] for c in fmt: if c == 'i': r = i elif c == 'n': r = fn elif c == 'N': # absolute filename r = pt.abspath(fn) elif c == 'C': # absolute corename r = pt.abspath(fn).split('.')[0] elif c == 'c': # ralative corename r = pt.basename(fn).split('.')[0] elif c == 'B': # basename.extension r = pt.basename(pt.abspath(fn)) elif c == 'b': # basename r = pt.basename(fn) elif c == 'D': # absolute directory r = pt.dirname(pt.abspath(fn)) elif c == 'd': # relative directory r = pt.dirname(fn) elif c == 'e': # extension(s) r = pt.basename(fn).split('.')[1:] if len(r) == 1: r = r[0] if len(r) == 0: r = None elif c == 'E': r = pt.basename(fn).split('.')[1:] if len(r) > 0: r = r[-1] if len(r) == 0: r = None else: continue rt.append(r) i += 1 if drop and len(rt) == 1: yield rt[0] else: yield rt
def num_pk(src): return len(gg(src))
def glob(pathname, recursive=False): return gg(pathname)
plt.clf() if __name__ == "__main__": niftipath = str(sys.argv[1]) mnipath = str(sys.argv[2]) ortho = str(sys.argv[3]) nRows = int(sys.argv[4]) nCuts = int(sys.argv[5]) showLRannot = bool(int(sys.argv[6])) figLayout = str(sys.argv[7]) threshpos = int(sys.argv[8]) threshneg = int(sys.argv[9]) findOptimalCut = bool(int(sys.argv[10])) imageType = str(sys.argv[11]) # Go through all the files in the data folder if requested if niftipath == 'data': fileList = gg('data/*.nii*') for fpath in fileList: for o in list(ortho): plotGlassbrainSlices( fpath, mnipath, o, nRows, nCuts, threshpos, threshneg, figLayout, showLRannot, findOptimalCut, imageType) else: for o in list(ortho): plotGlassbrainSlices( niftipath, mnipath, o, nRows, nCuts, threshpos, threshneg, figLayout, showLRannot, findOptimalCut, imageType)
def firstGlob(*args, ext=None): path = getPath(*args, ext=ext) path = (gg(path, recursive=True) if "**" in args else gg(path)) path = (path[0] if len(path) > 0 else None) return path
def recursiveGlob(*args, ext=None): path = getPath(*args, ext=ext) pathList = sorted(gg(path, recursive=True)) pathList = (pathList if len(pathList) > 0 else None) return pathList
def glob(*args, ext=None): path = getPath(*args, ext=ext) pathList = sorted(gg(path)) pathList = (pathList if len(pathList) > 0 else None) return pathList
# a = gg(pathname='Test/*.json') # # print(a) # # df = pd.DataFrame() # # print(df) # # for f in a: # tmp = pd.read_json(f, orient='index') # df = pd.concat([df, tmp], axis=0, ignore_index=True) # # print(df) # with open('Test/'+'test1.json') as g: # read_data = pd.read_json(g, orient='index') # print(read_data) # Use glob to make a list of json files in the TikTok_Test directory b = gg(pathname='TikTok_Test/*.json') print(b[0]) import json with open(b[0], "r") as h: data = json.load(h) print(data)
def main(src, dst): for fi in gg(pt.join(src, '*.pgz')): fo = pt.join(dst, pt.basename(fi)) work_3(fi, fo)
print(text[0:50]) print(text.lower()[0:50]) print(type(text)) print(len(text)) # ===================== import os from os.path import join from os.path import join as oj textdatei = oj("data", "Kraus.txt") with open(textdatei, "r", encoding="utf8") as infile: text = infile.read() print(text[0:50]) print(text.lower()[0:50]) print(type(text)) print(len(text)) # =============================== import glob from glob import glob as gg for file in gg(join("data", "*.txt")): #print(file) with open(file, "r", encoding="utf8") as infile: text = infile.read() print(text[0:50])
def __init__(self, type, envi_hdr="", envi_file="", ascii_spectra="", meta_csv="", ascii_bands="", directory_path="", meta_tab=""): """ loads a spectral library from common spectral library formats including crism (envi format), asu spectral library (ascii spectra and csv meta), and USGS (directory path and spectral bands ascii path) stores important spectral library features (spectra, spectral bands, name of spectra, and one hot labels) in a standard format Note: it is assumed that the first word of the "name" is the label of the mineral, which is generally true for all libraries with some exceptions. For these exceptions, the data should be relabelled. :param type: "asu", "crism", or "USGS" :param envi_hdr: path to envi header file (only for crism) :param envi_file: path to envi file (only for crism) :param ascii_spectra: path to ascii spectra file (ASU) :param meta_csv: path to meta csv file ( ASU) :param ascii_bands: path to spectral bands ascii file (USGS) :param director_path: path to directory (USGS) """ #assign object variables per asu spec lib type if type == "asu": self.source = "asu" self.spectra = np.loadtxt(ascii_spectra) self.spectra = np.delete( self.spectra, 0, 1) # delete first spectra column (wavenumbers) self.bands = np.loadtxt(ascii_spectra, usecols=0) self.meta = pd.read_csv(meta_csv) self.names = self.meta.sample_name.tolist() self.category = self.meta.category.tolist() if type == "kim": self.source = "kim" self.spectra = np.loadtxt(ascii_spectra) self.spectra = np.delete( self.spectra, 0, 1) # delete first spectra column (wavenumbers) self.bands = np.loadtxt(ascii_spectra, usecols=0) with open(meta_tab) as f: self.names = list(csv.reader(f, delimiter='\t')) self.category = self.names # assign object variables per crism spec lib type if type == "crism": self.source = "crism" self.envi_file = envi.open(envi_hdr, envi_file) self.spectra = self.envi_file.spectra.transpose() self.bands = self.envi_file.bands.centers self.names = self.envi_file.names self.category = self.names if type == "usgs": self.source = "usgs" self.bands = np.loadtxt(ascii_bands, skiprows=1) #iterate through all txt files in directory path first = True for f in gg(directory_path): temp_spectra = np.loadtxt(f, skiprows=1) temp_spectra = temp_spectra.reshape((len(temp_spectra), 1)) temp_meta = open(f, "r").readlines()[0].split() temp_name = temp_meta[2] if first: self.spectra = temp_spectra self.names = [temp_name] first = False else: self.spectra = np.append(self.spectra, temp_spectra, axis=1) self.names.append(temp_name) self.text_labels = self.names self.category = self.names #assign general object variables self.text_labels = [names.partition(" ")[0] for names in self.names] self.index = range(len(self.names)) self.src_index = range(len(self.names)) encoder = LabelBinarizer() self.onehot_labels = encoder.fit_transform(self.text_labels) self.onehot_category = encoder.fit_transform(self.category)