Esempio n. 1
0
    def shuffle(list_of_files):
        tf_os, tpath = mkstemp()
        tf = open(tpath, 'w')

        fds = [open(ff) for ff in list_of_files]

        for l in fds[0]:
            lines = [l.strip()] + [ff.readline().strip() for ff in fds[1:]]
            print("<CONCATE4SHUF>".join(lines), file=tf)

        [ff.close() for ff in fds]
        tf.close()

        os.system('shuf %s > %s' % (tpath, tpath + '.shuf'))

        fnames = [
            '/tmp/{}.{}.{}.shuf'.format(i, os.getpid(), time.time())
            for i, ff in enumerate(list_of_files)
        ]
        fds = [open(fn, 'w') for fn in fnames]

        for l in open(tpath + '.shuf'):
            s = l.strip().split('<CONCATE4SHUF>')
            for i, fd in enumerate(fds):
                print(s[i], file=fd)

        [ff.close() for ff in fds]

        os.remove(tpath)
        os.remove(tpath + '.shuf')

        return fnames
Esempio n. 2
0
    def shuffle(list_of_files):
        tf_os, tpath = mkstemp()
        tf = open(tpath, 'w')

        fds = [open(ff) for ff in list_of_files]

        for l in fds[0]:
            lines = [l.strip()] + [ff.readline().strip() for ff in fds[1:]]
            print("|||||".join(lines), file=tf)

        [ff.close() for ff in fds]
        tf.close()

        os.system('shuf %s > %s' % (tpath, tpath + '.shuf'))

        fds = [
            open(ff + '.{}.shuf'.format(os.getpid()), 'w')
            for ff in list_of_files
        ]

        for l in open(tpath + '.shuf'):
            s = l.strip().split('|||||')
            for i, fd in enumerate(fds):
                print(s[i], file=fd)

        [ff.close() for ff in fds]

        os.remove(tpath)
        os.remove(tpath + '.shuf')

        return [ff + '.{}.shuf'.format(os.getpid()) for ff in list_of_files]
Esempio n. 3
0
def untar_data(path):
    for file in os.scandir(path):
        if file.name.endswith('.zip'):
            print(
                os.stat(path + file.name).st_size, file.name,
                "this is zip file")
            with zipfile.ZipFile(file.name, 'r') as z:
                z.extractall(path)
        elif file.name.endswith('.gz'):
            '''specail condition needs to be added for .bin files'''
            print(
                os.stat(path + file.name).st_size, file.name,
                "this is gz file")

            tf = tarfile.open(file.name, "r")
            tf.extractall()
            tf.close()
        elif file.name.endswith('.tar'):
            print(
                os.stat(path + file.name).st_size, file.name,
                "this is tar file")
            tf = tarfile.open(file.name, "r")
            tf.extractall()
            tf.close()

    for file in os.scandir(path):
        print(file.name)
Esempio n. 4
0
def shuffle(file):
    tf_os, tpath = tempfile.mkstemp(dir='data')
    tf = open(tpath, 'w')

    fd = open(file, "r")
    for l in fd:
        print >> tf, l.strip("\n")
    tf.close()

    lines = open(tpath, 'r').readlines()
    random.shuffle(lines)
    path, filename = os.path.split(os.path.realpath(file))
    fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path)

    for l in lines:
        s = l.strip("\n")
        print >> fd, s

    fd.seek(0)
    os.remove(tpath)

    return fd
def promptuser(lines):
    """Display <lines> to user in their favourite editor.
	Then return the lines he entered ommiting empty lines and
	ones beginning with a #."""

    tf = tempfile.NamedTemporaryFile(delete=False)
    for line in lines:
        tf.write(line + '\n')
    tf.close()
    editor = os.getenv('EDITOR')
    if editor == None:
        editor = 'vi'
    subp = subprocess.Popen([editor, tf.name])
    subp.wait()
    f = open(tf.name)
    res = f.readlines()
    f.close()
    os.remove(tf.name)
    res = map(lambda x: x.strip(), res)  #strip
    res = filter(None, res)  #remove empty lines
    res = filter(lambda x: x[0] != '#', res)  #remove comment lines
    return res
Esempio n. 6
0
# scikit-image package
import skimage
import numpy as np
import random

from sklearn.model_selection import train_test_split

tf.enable_eager_execution()
tf.set_random_seed(0)
np.random.seed(0)

# unzip tar file
import tarfile
tf = tarfile.open("drive/My Drive/Colab Notebooks/notMNIST_large.tar")
tf.extractall()
tf.close()

train_dir = "notMNIST_large/"

imgs = []
labels = []

imageSize = 28


# load data and labels
def get_data(folder):
    imgs = []
    labels = []
    for folderName in os.listdir(folder):
        if not folderName.startswith('.'):