Ejemplo n.º 1
0
def main(src, ovr = 0):
    from glob import glob as gg
    for fi in gg(pt.join(src, '*.pgz')):
        fo = pt.splitext(fi)[0] + '.rds'
        if pt.exists(fo) and not ovr:
            print 'exists:', fo
        else:
            __enc2rds__(fi, fo)
Ejemplo n.º 2
0
def main(datenpfad):
    for textdatei in gg(datenpfad):
        print(textdatei)
        text = read_textfile(textdatei)
        lowercase = make_lowercase(text)
        cleantext = remove_punctuation(lowercase)
        print(cleantext)
    print("Fertig.")
Ejemplo n.º 3
0
def main(textfolder):
    allresults = {}
    for textfile in gg(textfolder):
        filename = os.path.basename(textfile).split(".")[0]
        text = read_textfile(textfile)
        text = clean_text(text)
        textstats = get_textstats(text)
        allresults[filename] = textstats
    print(allresults)
Ejemplo n.º 4
0
def __sample_wm__(wrk):
    """
    given a list of center vertices {cvs}, and hemispheres {hms},
    pick regions from WM surface across subject in {src}
    """
    ## fetch working specifications
    src = wrk['src']     # source directory with subjects
    dst = wrk['dst']     # target filenames
    hms = wrk['hms']     # hemispheres
    cvs = wrk['cvs']     # center vertices
    nbs = wrk['nbs']     # neighbor table for each vertex
    sz = wrk['sz']       # region size

    ## lists of output path, vertex indices, and connetion matrices
    lfo, lvi, lcn = [], [], []
    for hm, nb, cv in izip(hms, nbs, cvs):
        vi, cn = __neighbor__(nb, cv, sz)
        lvi.append(vi)
        lcn.append(cn)
        lfo.append(pt.join(dst, '{}{:05X}'.format(hm, cv)))
        
    ## lists of surfaces to be sampled, and subjects
    lsf, lsb = [[] for i in xrange(len(lvi))], []

    ## iterate all subjects
    print 'xt: sample ', len(lvi), 'WM areas from ', src, ':'

    for fn in gg(pt.join(src, '*')):
        if not fn.endswith('npz'):
            continue
        lsb.append(pt.basename(fn).split('.')[0])
        print lsb[-1]
        sys.stdout.flush()
        wm = np.load(fn)

        ## sample surfaces for subject {sb}
        ## si: surface index, hm: hemisphere, vi: vertex indices
        for si, hm, vi in izip(xrange(len(lvi)), hms, lvi):
            lsf[si].append(wm[hm][vi])

        if not len(lsb) < 5:
            break

    ## write the samples to file in numpy format.
    print 'xt: write WM samples to ', dst, ':'
    sys.stdout.flush()
    sbj = np.array(lsb)
    for sf, vi, cn, fo in izip(lsf, lvi, lcn, lfo):
        np.savez_compressed(fo + '.npz', sbj=sbj, vtx=np.vstack(sf), cmx=cn)
        vi = ['{:05X}'.format(i) for i in vi]
        __save_rds__(sf, lsb, vi, cn, fo + '.rds')
        print fo + ": created"
        sys.stdout.flush()

    print 'xt: success'
    sys.stdout.flush()
Ejemplo n.º 5
0
def get_pk(src, idx = 0):
    """ get data from pickle """
    if pt.isdir(src):
        fn = gg(pt.join(src, "*"))[idx]
    else:
        fn = src

    with open(fn, 'rb') as fp:
        obj = cPickle.load(fp)

    print fn + ": fetched"
    return obj
Ejemplo n.º 6
0
def apec(src, dst, sn, hm, ovr=0):
    """
    given a directory of subjects, extract anatomical regions
    src: subject source
    dst: where to put extracted regions
    sn: region serial number, 2~35
    hm: hemesphere, lh or rh
    """
    ## output path
    fo = pt.join(dst, '{}{:02d}.npz'.format(hm, sn))
    if pt.exists(fo) and not ovr:
        print 'exists:', fo
        return
    
    ## fetch anatomical peceration table
    at = np.load('apec.npz')

    ## vertex indices
    vi = (at[hm]==sn).nonzero()[0]

    ## anatomy region id and name
    id, nm = at['tb'][sn]['id'], at['tb'][sn]['nm']

    ## surface, and subject index
    vt, sb = [], []

    ## iterate all subjects
    print 'xt: extract', hm, at['tb'][sn]['nm'], 'from ', src, ':'

    for fn in gg(pt.join(src, '*.npz')):
        sb.append(pt.basename(fn).split('.')[0])
        print sb[-1]
        sys.stdout.flush()
        wm = np.load(fn)

        ## extract surfaces for subject {sb}
        ## hm: hemisphere, vi: vertex indices
        vt.append(wm[hm][vi])
        wm.close()

    ## write the samples to file in numpy format.
    print 'xt: write surface to ', dst
    sys.stdout.flush()

    vt=np.vstack(vt)
    sb=np.array(sb)
    np.savez_compressed(
        fo, sb=sb, vt=vt, vi=vi, hm=hm, sn=sn, id=id, nm=nm)
    
    print 'xt: success'
    sys.stdout.flush()
Ejemplo n.º 7
0
def main(textfolder):
    for textfile in gg(textfolder):
        text = read_textfile(textfile)
        text = clean_text(text)
        textstats = get_textstats(text)
Ejemplo n.º 8
0
def itr_fn(src = "", fmt = 'n', flt = None, drop = True):

    """
    filename iterator

    drop: drop list structure if only one file attribute
    is returned.
    format code:
    n: file name, N: absolute file name
    c: core name, C: absolute core name
    b: base name, B: absolute base name
    d: directory, D: absolute directory
    e: extension, E: absolute extension
    """
    src = resolve_path(src)
    if pt.isdir(src):
        src = pt.join(src, "*")

    if flt == None:
        flt = lambda w: True

    i = 0
    for fn in gg(src):
        if not flt(fn):
            continue
        rt = []
        for c in fmt:
            if c == 'i':
                r = i
            elif c == 'n':
                r = fn
            elif c == 'N':                # absolute filename
                r = pt.abspath(fn)
            elif c == 'C':                # absolute corename
                r = pt.abspath(fn).split('.')[0]
            elif c == 'c':                # ralative corename
                r = pt.basename(fn).split('.')[0]
            elif c == 'B':                # basename.extension
                r = pt.basename(pt.abspath(fn))
            elif c == 'b':                # basename
                r = pt.basename(fn)       
            elif c == 'D':                # absolute directory
                r = pt.dirname(pt.abspath(fn))
            elif c == 'd':                # relative directory
                r = pt.dirname(fn)
            elif c == 'e':                # extension(s)
                r = pt.basename(fn).split('.')[1:]
                if len(r) == 1:
                    r = r[0]
                if len(r) == 0:
                    r = None
            elif c == 'E':
                r = pt.basename(fn).split('.')[1:]
                if len(r) > 0:
                    r = r[-1]
                if len(r) == 0:
                    r = None
            else:
                continue
            rt.append(r)
        i += 1
        if drop and len(rt) == 1:
            yield rt[0]
        else:
            yield rt
Ejemplo n.º 9
0
def num_pk(src):
    return len(gg(src))
Ejemplo n.º 10
0
 def glob(pathname, recursive=False):
     return gg(pathname)
    plt.clf()


if __name__ == "__main__":

    niftipath = str(sys.argv[1])
    mnipath = str(sys.argv[2])
    ortho = str(sys.argv[3])
    nRows = int(sys.argv[4])
    nCuts = int(sys.argv[5])
    showLRannot = bool(int(sys.argv[6]))
    figLayout = str(sys.argv[7])
    threshpos = int(sys.argv[8])
    threshneg = int(sys.argv[9])
    findOptimalCut = bool(int(sys.argv[10]))
    imageType = str(sys.argv[11])

    # Go through all the files in the data folder if requested
    if niftipath == 'data':
        fileList = gg('data/*.nii*')
        for fpath in fileList:
            for o in list(ortho):
                plotGlassbrainSlices(
                    fpath, mnipath, o, nRows, nCuts, threshpos, threshneg,
                    figLayout, showLRannot, findOptimalCut, imageType)
    else:
        for o in list(ortho):
            plotGlassbrainSlices(
                niftipath, mnipath, o, nRows, nCuts, threshpos, threshneg,
                figLayout, showLRannot, findOptimalCut, imageType)
Ejemplo n.º 12
0
def firstGlob(*args, ext=None):
    path = getPath(*args, ext=ext)
    path = (gg(path, recursive=True) if "**" in args else gg(path))
    path = (path[0] if len(path) > 0 else None)
    return path
Ejemplo n.º 13
0
def recursiveGlob(*args, ext=None):
    path = getPath(*args, ext=ext)
    pathList = sorted(gg(path, recursive=True))
    pathList = (pathList if len(pathList) > 0 else None)
    return pathList
Ejemplo n.º 14
0
def glob(*args, ext=None):
    path = getPath(*args, ext=ext)
    pathList = sorted(gg(path))
    pathList = (pathList if len(pathList) > 0 else None)
    return pathList
# a = gg(pathname='Test/*.json')
#
# print(a)
#
# df = pd.DataFrame()
#
# print(df)
#
# for f in a:
#     tmp = pd.read_json(f, orient='index')
#     df = pd.concat([df, tmp], axis=0, ignore_index=True)
#
# print(df)

# with open('Test/'+'test1.json') as g:
#     read_data = pd.read_json(g, orient='index')
#     print(read_data)

# Use glob to make a list of json files in the TikTok_Test directory
b = gg(pathname='TikTok_Test/*.json')

print(b[0])

import json

with open(b[0], "r") as h:
    data = json.load(h)

print(data)
Ejemplo n.º 16
0
def main(src, dst):
    for fi in gg(pt.join(src, '*.pgz')):
        fo = pt.join(dst, pt.basename(fi))
        work_3(fi, fo)
Ejemplo n.º 17
0
    print(text[0:50])
    print(text.lower()[0:50])
    print(type(text))
    print(len(text))

# =====================

import os
from os.path import join
from os.path import join as oj

textdatei = oj("data", "Kraus.txt")

with open(textdatei, "r", encoding="utf8") as infile:
    text = infile.read()
    print(text[0:50])
    print(text.lower()[0:50])
    print(type(text))
    print(len(text))

# ===============================

import glob
from glob import glob as gg

for file in gg(join("data", "*.txt")):
    #print(file)
    with open(file, "r", encoding="utf8") as infile:
        text = infile.read()
        print(text[0:50])
Ejemplo n.º 18
0
    def __init__(self,
                 type,
                 envi_hdr="",
                 envi_file="",
                 ascii_spectra="",
                 meta_csv="",
                 ascii_bands="",
                 directory_path="",
                 meta_tab=""):
        """
        loads a spectral library from common spectral library formats including crism (envi format), asu spectral library
        (ascii spectra and csv meta), and USGS (directory path and spectral bands ascii path)

        stores important spectral library features (spectra, spectral bands, name of spectra, and one hot labels) in a standard format

        Note: it is assumed that the first word of the "name" is the label of the mineral, which is generally true for all
        libraries with some exceptions. For these exceptions, the data should be relabelled.

        :param type: "asu", "crism", or "USGS"
        :param envi_hdr: path to envi header file (only for crism)
        :param envi_file: path to envi file (only for crism)
        :param ascii_spectra: path to ascii spectra file (ASU)
        :param meta_csv: path to meta csv file ( ASU)
        :param ascii_bands: path to spectral bands ascii file (USGS)
        :param director_path: path to directory (USGS)
        """

        #assign object variables per asu spec lib type
        if type == "asu":
            self.source = "asu"
            self.spectra = np.loadtxt(ascii_spectra)
            self.spectra = np.delete(
                self.spectra, 0,
                1)  # delete first spectra column (wavenumbers)
            self.bands = np.loadtxt(ascii_spectra, usecols=0)
            self.meta = pd.read_csv(meta_csv)
            self.names = self.meta.sample_name.tolist()
            self.category = self.meta.category.tolist()

        if type == "kim":
            self.source = "kim"
            self.spectra = np.loadtxt(ascii_spectra)
            self.spectra = np.delete(
                self.spectra, 0,
                1)  # delete first spectra column (wavenumbers)
            self.bands = np.loadtxt(ascii_spectra, usecols=0)
            with open(meta_tab) as f:
                self.names = list(csv.reader(f, delimiter='\t'))
            self.category = self.names

        # assign object variables per crism spec lib type
        if type == "crism":
            self.source = "crism"
            self.envi_file = envi.open(envi_hdr, envi_file)
            self.spectra = self.envi_file.spectra.transpose()
            self.bands = self.envi_file.bands.centers
            self.names = self.envi_file.names
            self.category = self.names

        if type == "usgs":
            self.source = "usgs"
            self.bands = np.loadtxt(ascii_bands, skiprows=1)
            #iterate through all txt files in directory path
            first = True
            for f in gg(directory_path):
                temp_spectra = np.loadtxt(f, skiprows=1)
                temp_spectra = temp_spectra.reshape((len(temp_spectra), 1))
                temp_meta = open(f, "r").readlines()[0].split()
                temp_name = temp_meta[2]

                if first:
                    self.spectra = temp_spectra
                    self.names = [temp_name]
                    first = False
                else:
                    self.spectra = np.append(self.spectra,
                                             temp_spectra,
                                             axis=1)
                    self.names.append(temp_name)
            self.text_labels = self.names
            self.category = self.names

        #assign general object variables
        self.text_labels = [names.partition(" ")[0] for names in self.names]
        self.index = range(len(self.names))
        self.src_index = range(len(self.names))
        encoder = LabelBinarizer()
        self.onehot_labels = encoder.fit_transform(self.text_labels)
        self.onehot_category = encoder.fit_transform(self.category)