예제 #1
0
파일: extras.py 프로젝트: gridl/accelerator
def pickle_save(variable, filename='result', sliceno=None, temp=None):
	filename = full_filename(filename, '.pickle', sliceno)
	if temp == Temp.DEBUG and temp is not True and '--debug' not in argv:
		return
	with FileWriteMove(filename, temp) as fh:
		# use protocol version 2 so python2 can read the pickles too.
		pickle.dump(variable, fh, 2)
예제 #2
0
def _download_norb_small(dataset):
    """
    Download the Norb dataset
    """
    print('Downloading small resized norb data')

    urllib.urlretrieve('http://dl.dropbox.com/u/13294233/smallnorb/smallnorb-'
                       '5x46789x9x18x6x2x32x32-training-dat-matlab-bicubic.mat',
                       dataset + '/smallnorb_train_x.mat')
    urllib.urlretrieve('http://dl.dropbox.com/u/13294233/smallnorb/smallnorb-'
                       '5x46789x9x18x6x2x96x96-training-cat-matlab.mat',
                       dataset + '/smallnorb_train_t.mat')

    urllib.urlretrieve('http://dl.dropbox.com/u/13294233/smallnorb/smallnorb-'
                       '5x01235x9x18x6x2x32x32-testing-dat-matlab-bicubic.mat',
                       dataset + '/smallnorb_test_x.mat')
    urllib.urlretrieve('http://dl.dropbox.com/u/13294233/smallnorb/smallnorb-'
                       '5x01235x9x18x6x2x96x96-testing-cat-matlab.mat',
                       dataset + '/smallnorb_test_t.mat')

    data = loadmat(dataset + '/smallnorb_train_x.mat')['traindata']
    train_x = np.concatenate([data[:,0,:].T, data[:,0,:].T]).astype('float32')
    data = loadmat(dataset + '/smallnorb_train_t.mat')
    train_t = data['trainlabels'].flatten().astype('float32')
    train_t = np.concatenate([train_t, train_t])

    data = loadmat(dataset + '/smallnorb_test_x.mat')['testdata']
    test_x = np.concatenate([data[:,0,:].T, data[:,0,:].T]).astype('float32')
    data = loadmat(dataset + '/smallnorb_test_t.mat')
    test_t = data['testlabels'].flatten().astype('float32')
    test_t = np.concatenate([test_t, test_t])
    with open(dataset+'/norbsmall32x32.cpkl','w') as f:
        cPkl.dump([train_x, train_t, test_x, test_t], f,
                  protocol=cPkl.HIGHEST_PROTOCOL)
예제 #3
0
def save_state():
    """Save the program state, for debugging purposes."""

    # relax data store singleton import.  Must be done here!
    try:
        from data_store import Relax_data_store; ds = Relax_data_store()

    # Ok, this is not relax so don't do anything!
    except ImportError:
        return

    # Append the date and time to the save file.
    now = time.localtime()
    file_name = "relax_state_%i%02i%02i_%02i%02i%02i" % (now[0], now[1], now[2], now[3], now[4], now[5])

    # Open the file for writing.
    if bz2:
        sys.stderr.write("\n\nStoring the relax state in the file '%s.bz2'.\n\n\n" % file_name)
        file = BZ2File(file_name+'.bz2', 'w')
    else:
        sys.stderr.write("\n\nStoring the relax state in the file '%s'.\n\n\n" % file_name)
        file = open(file_name, 'w')

    # Pickle the data class and write it to file
    pickle.dump(ds, file, 1)

    # Close the file.
    file.close()
예제 #4
0
def _download_omniglot(dataset):
    """
    Download the omniglot dataset if it is not present.
    :return: The train, test and validation set.
    """
    from scipy.misc import imread,imresize
    origin_eval = (
        "https://github.com/brendenlake/omniglot/"
        "raw/master/python/images_evaluation.zip"
    )
    origin_back = (
        "https://github.com/brendenlake/omniglot/"
        "raw/master/python/images_background.zip"
    )
    print('Downloading data from %s' % origin_eval)
    urllib.urlretrieve(origin_eval, dataset + '/images_evaluation.zip')
    print('Downloading data from %s' % origin_back)
    urllib.urlretrieve(origin_back, dataset + '/images_background.zip')

    with zipfile.ZipFile(dataset + '/images_evaluation.zip', "r") as z:
        z.extractall(dataset)
    with zipfile.ZipFile(dataset + '/images_background.zip', "r") as z:
        z.extractall(dataset)

    background =  dataset + '/images_background'
    evaluation =  dataset + '/images_evaluation'
    matches = []
    for root, dirnames, filenames in os.walk(background):
        for filename in fnmatch.filter(filenames, '*.png'):
            matches.append(os.path.join(root, filename))
    for root, dirnames, filenames in os.walk(evaluation):
        for filename in fnmatch.filter(filenames, '*.png'):
            matches.append(os.path.join(root, filename))

    train = []
    test = []

    def _load_image(fn):
        image = imread(fn, True)
        image = imresize(image, (32, 32), interp='bicubic')
        image = image.reshape((-1))
        image = np.abs(image-255.)/255.
        return image

    for p in matches:
        if any(x in p for x in ['16.png','17.png','18.png','19.png','20.png']):
            test.append(_load_image(p))
        else:
            train.append(_load_image(p))

    shutil.rmtree(background+'/')
    shutil.rmtree(evaluation+'/')

    test = np.asarray(test)
    train = np.asarray(train)
    with open(dataset+'/omniglot.cpkl','w') as f:
        cPkl.dump([train, test],f,protocol=cPkl.HIGHEST_PROTOCOL)
예제 #5
0
def _download_lwf(dataset,size):
    from sklearn.datasets import fetch_lfw_people
    '''
    :param dataset:
    :return:
    '''
    lfw_people = fetch_lfw_people(color=True,resize=size)
    f = gzip.open(dataset, 'w')
    cPkl.dump([lfw_people.images.astype('uint8'),lfw_people.target], f,
              protocol=cPkl.HIGHEST_PROTOCOL)
    f.close()
예제 #6
0
파일: cache.py 프로젝트: wandec/sinaapp
    def store(self, key, value):
        path = self._get_path(key)
        self.lock.acquire()
        try:
            # acquire lock and open file
            f_lock = self._lock_file(path)
            datafile = open(path, 'wb')

            # write data
            pickle.dump((time.time(), value), datafile)

            # close and unlock file
            datafile.close()
            self._unlock_file(f_lock)
        finally:
            self.lock.release()
예제 #7
0
    def store(self, key, value):
        path = self._get_path(key)
        self.lock.acquire()
        try:
            # acquire lock and open file
            f_lock = self._lock_file(path)
            datafile = open(path, 'wb')

            # write data
            pickle.dump((time.time(), value), datafile)

            # close and unlock file
            datafile.close()
            self._unlock_file(f_lock)
        finally:
            self.lock.release()
예제 #8
0
def _download_svhn(dataset, extra):
    """
    Download the SVHN dataset
    """
    print('Downloading data from http://ufldl.stanford.edu/housenumbers/, ' \
          'this may take a while...')
    if extra:
        print("Downloading extra data...")
        urllib.urlretrieve('http://ufldl.stanford.edu/housenumbers/extra_32x32.mat',
                           dataset+'extra_32x32.mat')
        extra = loadmat(dataset+'extra_32x32.mat')
        extra_x = extra['X'].swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
        extra_y = extra['y'].reshape((-1)) - 1

        print("Saving extra data")
        with open(dataset +'svhn_extra.cpkl', 'w') as f:
            pkl.dump([extra_x,extra_y],f,protocol=cPkl.HIGHEST_PROTOCOL)
        os.remove(dataset+'extra_32x32.mat')

    else:
        print("Downloading train data...")
        urllib.urlretrieve('http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
                           dataset+'train_32x32.mat')
        print("Downloading test data...")
        urllib.urlretrieve('http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
                           dataset+'test_32x32.mat')

        train = loadmat(dataset+'train_32x32.mat')
        train_x = train['X'].swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
        train_y = train['y'].reshape((-1)) - 1
        test = loadmat(dataset+'test_32x32.mat')
        test_x = test['X'].swapaxes(2,3).swapaxes(1,2).swapaxes(0,1)
        test_y = test['y'].reshape((-1)) - 1

        print("Saving train data")
        with open(dataset +'svhn_train.cpkl', 'w') as f:
            cPkl.dump([train_x,train_y],f,protocol=cPkl.HIGHEST_PROTOCOL)
        print("Saving test data")
        with open(dataset +'svhn_test.cpkl', 'w') as f:
            pkl.dump([test_x,test_y],f,protocol=cPkl.HIGHEST_PROTOCOL)
        os.remove(dataset+'train_32x32.mat')
        os.remove(dataset+'test_32x32.mat')

    # helper function for converting chars to matrix format
    def create_matrix(reviews, y_cls):
        num_seqs = len(reviews)
        X = np.zeros((num_seqs, max_len), dtype='int32') -1  # set all to -1
        for row in range(num_seqs):
            review = reviews[row]
            for col in range(len(review)):
                # try to look up key otherwise use unk_idx
                if review[col] in char2idx:
                    char_idx = char2idx[review[col]]
                else:
                    char_idx = unk_idx
                X[row, col] = char_idx

        mask = (X != -1).astype('float32')
        X[X==-1] = 0
        y = np.ones(num_seqs, dtype='int32')*y_cls
        return X, y, mask

    X_pos, y_pos, mask_pos = create_matrix(pos_lst, 1)
    X_neg, y_neg, mask_neg = create_matrix(neg_lst, 0)
    X = np.concatenate([X_pos, X_neg], axis=0)
    y = np.concatenate([y_pos, y_neg], axis=0)
    mask = np.concatenate([mask_pos, mask_neg])

    print("-"*40)
    print("Minium length filter :", minimum_len)
    print("Maximum length filter:", maximum_len)
    if minimum_len is not None:
        seq_lens = mask.sum(axis=1)
        keep = seq_lens >= minimum_len
        print("Seqs below minimum   : %i" % np.invert(keep).sum())
        X = X[keep, :]
        y = y[keep]
        mask = mask[keep, :]

    if maximum_len is not None:
        seq_lens = mask.sum(axis=1)
        keep = seq_lens <= maximum_len
        print("Seqs above maximum   : %i" % np.invert(keep).sum())
        X = X[keep, :]
        y = y[keep]
        mask = mask[keep, :]

    np.random.seed(seed)
    p = np.random.permutation(X.shape[0])
    X = X[p]
    y = y[p]
    mask = mask[p]

    seq_lens = mask.sum(axis=1).astype('int32')
    print("X                    :", X.shape, X.dtype)
    print("y                    :", y.shape, y.dtype)
    print("mask                 :", mask.shape, mask.dtype)
    print("MIN length           : ", seq_lens.min())
    print("MAX length           : ", seq_lens.max())
    print("MEAN length          : ", seq_lens.mean())
    print("UNKOWN chars         : ", np.sum(X==unk_idx))
    print("-"*40)

    # check that idx's in X is the number of vocab_size + unk_idx
    n = vocab_size if isinstance(vocab_size, int) else len(vocab_size)
    assert len(np.unique(X)) == n  + 1
    assert sum(np.unique(y)) == 1 # check that y is 0,1
    return X, y, mask, vocab