예제 #1
0
def store_and_or_load_data(dataset_info, outputdir):
    if dataset_info.endswith('.pkl'):
        save_path = dataset_info
    else:
        dataset = os.path.basename(dataset_info)
        data_dir = os.path.dirname(dataset_info)
        save_path = os.path.join(outputdir, dataset + '_Manager.pkl')

    if not os.path.exists(save_path):
        lock = lockfile.LockFile(save_path)
        while not lock.i_am_locking():
            try:
                lock.acquire(timeout=60)  # wait up to 60 seconds
            except lockfile.LockTimeout:
                lock.break_lock()
                lock.acquire()
        print('I locked', lock.path)
        # It is not yet sure, whether the file already exists
        try:
            if not os.path.exists(save_path):
                D = CompetitionDataManager(dataset, data_dir,
                                           verbose=True,
                                           encode_labels=True)
                fh = open(save_path, 'w')
                pickle.dump(D, fh, -1)
                fh.close()
            else:
                D = pickle.load(open(save_path, 'r'))
        except Exception:
            raise
        finally:
            lock.release()
    else:
        D = pickle.load(open(save_path, 'r'))
    return D
    def load_model(modelfile):
        layers = {}
        with open(modelfile, "rb") as f:
            layers = cPickle.load(f)
        with open("params_" + modelfile, "rb") as f:
            for layer_key in layers.keys():
                layers[layer_key].params = cPickle.load(f)

        n_of_layers = len(layers.keys())

        flstm = AttendedLSTM(input_dim=layers[0].input_dim, output_dim=layers[0].outer_output_dim,
                                   number_of_layers=n_of_layers, hidden_dims=[layers[0].output_dim])
        flstm.build_loaded_model(layers)

        embedded_test, test_labels = WordEmbeddingLayer.load_embedded_data(path="../data/", name="test",
                                                                           representation="glove.840B.300d")


        binary_embedded_test = []
        binary_test_labels = []
        for i in np.arange(len(embedded_test)):
            if np.argmax(test_labels[i]) != 1:
                binary_embedded_test.append(embedded_test[i])
                binary_test_labels.append(np.eye(2)[np.argmax(test_labels[i]) // 2])

        flstm.test_dev(binary_embedded_test, binary_test_labels)
        return flstm
예제 #3
0
파일: cifar.py 프로젝트: h2oai/deepwater
def extract_images(f):
    """Extract the images into a 4D uint8 numpy array [index, y, x, depth].
    Args:
      f: A file object that can be passed into a gzip reader.
    Returns:
      data: A 4D uint8 numpy array [index, y, x, depth].
    Raises:
      ValueError: If the bytestream does not start with 2051.
    """
    train_images = []
    train_labels = []
    test_images = []
    test_labels = []
    print('Extracting', f.name)
    with tarfile.TarFile(fileobj=gzip.GzipFile(fileobj=f)) as tfile:
        for name in tfile.getnames():
            if 'data_batch' in name:
                fd = tfile.extractfile(name)
                data = cPickle.load(fd)
                for image, label in zip(data['data'], data['labels']):
                    image = np.array(image)
                    train_images.append(image)
                    train_labels.append(label)
            elif 'test_batch' in name:
                fd = tfile.extractfile(name)
                data = cPickle.load(fd)
                for image, label in zip(data['data'], data['labels']):
                    image = np.array(image)
                    test_images.append(image)
                    test_labels.append(label)
            else:
                continue

    return train_images, train_labels, test_images, test_labels
def predict_new():
    """
    An example of how to load a trained model and use it
    to predict labels.
    """

    # load the saved model
    classifier = pickle.load(open('../data/lr_best_model.pkl'))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.y_pred)

    # We can test it on some examples from test test
    with gzip.open('../data/kaggle_test.pkl.gz', 'rb') as f:
        test_data = pickle.load(f)

    predicted_values = predict_model(test_data/255)

    result = numpy.vstack((numpy.arange(predicted_values.shape[0])+1,predicted_values))

    res = result.T

    import csv
    numpy.savetxt("../data/result_lr.csv",res,fmt=('%d','%d'),delimiter=',',header='ImageId,Label')
예제 #5
0
def read_pickle_from_file(filename):
  with tf.gfile.Open(filename, 'rb') as f:
    if sys.version_info >= (3, 0):
      data_dict = pickle.load(f, encoding='bytes')
    else:
      data_dict = pickle.load(f)
  return data_dict
예제 #6
0
파일: sample.py 프로젝트: jtoy/word-rnn-tf
def sample(args):
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = cPickle.load(f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f:
        chars, vocab = cPickle.load(f)
    model = Model(saved_args, True)
    val_loss_file = args.save_dir + '/val_loss.json'
    with tf.Session() as sess:
        saver = tf.train.Saver(tf.all_variables())
        if os.path.exists(val_loss_file):
            with open(val_loss_file, "r") as text_file:
                text = text_file.read()
                loss_json = json.loads(text)
                losses = loss_json.keys()
                losses.sort(key=lambda x: float(x))
                loss = losses[0]
                model_checkpoint_path =  loss_json[loss]['checkpoint_path']
                #print(model_checkpoint_path)
                saver.restore(sess, model_checkpoint_path)
                result = model.sample(sess, chars, vocab, args.n, args.prime, args.sample_rule, args.temperature)
                print(result) #add this back in later, not sure why its not working
                output = "/data/output/"+ str(int(time.time())) + ".txt"
                with open(output, "w") as text_file:
                    text_file.write(result)
                print(output)
예제 #7
0
    def _parse_file(cls, path, pickle=False):
        """parse a .chain file into a list of the type [(L{Chain}, arr, arr, arr) ...]

        :param fname: name of the file"""

        fname = path
        if fname.endswith(".gz"):
            fname = path[:-3]

        if fname.endswith('.pkl'):
            #you asked for the pickled file. I'll give it to you
            log.debug("loading pickled file %s ..." % fname)
            return cPickle.load( open(fname) )
        elif os.path.isfile("%s.pkl" % fname):
            #there is a cached version I can give to you
            log.info("loading pickled file %s.pkl ..." % fname)
            if os.stat(path).st_mtime > os.stat("%s.pkl" % fname).st_mtime:
                log.critical("*** pickled file %s.pkl is not up to date ***" % (path))
            return cPickle.load( open("%s.pkl" % fname) )

        data = fastLoadChain(path, cls._strfactory)
        if pickle and not os.path.isfile('%s.pkl' % fname):
            log.info("pckling to %s.pkl" % (fname))
            with open('%s.pkl' % fname, 'wb') as fd:
                cPickle.dump(data, fd)
        return data
예제 #8
0
파일: cifar.py 프로젝트: 5ke/keras
def load_batch(fpath, label_key='labels'):
    """Internal utility for parsing CIFAR data.

    # Arguments
        fpath: path the file to parse.
        label_key: key for label data in the retrieve
            dictionary.

    # Returns
        A tuple `(data, labels)`.
    """
    f = open(fpath, 'rb')
    if sys.version_info < (3,):
        d = cPickle.load(f)
    else:
        d = cPickle.load(f, encoding='bytes')
        # decode utf8
        d_decoded = {}
        for k, v in d.items():
            d_decoded[k.decode('utf8')] = v
        d = d_decoded
    f.close()
    data = d['data']
    labels = d[label_key]

    data = data.reshape(data.shape[0], 3, 32, 32)
    return data, labels
예제 #9
0
    def creator(path):
        archive_path = download.cached_download(url)

        train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8)
        train_y = numpy.empty((5, 10000), dtype=numpy.uint8)
        test_y = numpy.empty(10000, dtype=numpy.uint8)

        dir_name = '{}-batches-py'.format(name)

        with tarfile.open(archive_path, 'r:gz') as archive:
            # training set
            for i in range(5):
                file_name = '{}/data_batch_{}'.format(dir_name, i + 1)
                d = pickle.load(archive.extractfile(file_name))
                train_x[i] = d['data']
                train_y[i] = d['labels']

            # test set
            file_name = '{}/test_batch'.format(dir_name)
            d = pickle.load(archive.extractfile(file_name))
            test_x = d['data']
            test_y[...] = d['labels']  # copy to array

        train_x = train_x.reshape(50000, 3072)
        train_y = train_y.reshape(50000)

        numpy.savez_compressed(path, train_x=train_x, train_y=train_y,
                               test_x=test_x, test_y=test_y)
        return {'train_x': train_x, 'train_y': train_y,
                'test_x': test_x, 'test_y': test_y}
예제 #10
0
def store_and_or_load_data(outputdir, dataset, data_dir):
    save_path = os.path.join(outputdir, dataset + '_Manager.pkl')
    if not os.path.exists(save_path):
        lock = lockfile.LockFile(save_path)
        while not lock.i_am_locking():
            try:
                lock.acquire(timeout=60)  # wait up to 60 seconds
            except lockfile.LockTimeout:
                lock.break_lock()
                lock.acquire()
        print('I locked', lock.path)
        # It is not yet sure, whether the file already exists
        try:
            if not os.path.exists(save_path):
                D = SimpleDataManager(dataset, data_dir, verbose=True)
                fh = open(save_path, 'w')
                pickle.dump(D, fh, -1)
                fh.close()
            else:
                D = pickle.load(open(save_path, 'r'))
        except Exception:
            raise
        finally:
            lock.release()
    else:
        D = pickle.load(open(save_path, 'r'))
        print('Loaded data')
    return D
예제 #11
0
def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))
예제 #12
0
def test_save_the_best():
    with NamedTemporaryFile() as dst,\
            NamedTemporaryFile() as dst_best:
        track_cost = TrackTheBest("cost")
        main_loop = MockMainLoop(
            extensions=[FinishAfter(after_n_epochs=1),
                        WriteCostExtension(),
                        track_cost,
                        Checkpoint(dst.name, after_batch=True,
                                   save_separately=['log'])
                        .add_condition(
                            "after_batch",
                            OnLogRecord(track_cost.notification_name),
                            (dst_best.name,))])
        main_loop.run()

        assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name)
        assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name)
        assert main_loop.log[6]['saved_to'] == (dst.name,)
        with open(dst_best.name, 'rb') as src:
            assert cPickle.load(src).log.status['iterations_done'] == 5
        root, ext = os.path.splitext(dst_best.name)
        log_path = root + "_log" + ext
        with open(log_path, 'rb') as src:
            assert cPickle.load(src).status['iterations_done'] == 5
예제 #13
0
def test_read_backward_compatibility():
    """Test backwards compatibility with a pickled file that's created with Python 2.7.3,
    Numpy 1.7.1_ahl2 and Pandas 0.14.1
    """
    fname = path.join(path.dirname(__file__), "data", "test-data.pkl")

    # For newer versions; verify that unpickling fails when using cPickle
    if PANDAS_VERSION >= LooseVersion("0.16.1"):
        if sys.version_info[0] >= 3:
            with pytest.raises(UnicodeDecodeError), open(fname) as fh:
                cPickle.load(fh)
        else:
            with pytest.raises(TypeError), open(fname) as fh:
                cPickle.load(fh)

    # Verify that PickleStore() uses a backwards compatible unpickler.
    store = PickleStore()

    with open(fname) as fh:
        # PickleStore compresses data with lz4
        version = {'blob': compressHC(fh.read())}
    df = store.read(sentinel.arctic_lib, version, sentinel.symbol)

    expected = pd.DataFrame(range(4), pd.date_range(start="20150101", periods=4))
    assert (df == expected).all().all()
예제 #14
0
파일: _tmvaFactory.py 프로젝트: 0x0all/rep
def main():
    # Reading the configuration from stdin
    classifier = pickle.load(sys.stdin)
    info = pickle.load(sys.stdin)
    assert isinstance(classifier, tmva.TMVAClassifier) or isinstance(classifier, tmva.TMVARegressor)
    assert isinstance(info, tmva._AdditionalInformation)
    tmva_process(classifier, info)
예제 #15
0
def _pickle_load(f):
    if sys.version_info > (3, ):
        # python3
        return pickle.load(f, encoding='latin-1')
    else:
        # python2
        return pickle.load(f)
예제 #16
0
    def __init__(self, experiment_name):
        self.engine = experiment.Experiment.get_engine(
            experiment_name, "sqlite"
        )
        SQLAlchemySession.configure(bind=self.engine)
        self.session = SQLAlchemySession()

        self.hdf5_file = h5py.File(
            os.path.join(experiment_name, "phenotypes.hdf5"),
            "r"
        )

        self.config = os.path.join(experiment_name, "configuration.yaml")
        if not os.path.isfile(self.config):
            self.config = None

        # Experiment info.
        filename = os.path.join(experiment_name, "experiment_info.pkl")
        with open(filename, "rb") as f:
            self.info = pickle.load(f)

        # Task info.
        self.task_info = {}
        path = os.path.join(experiment_name, "tasks")
        for task_dir in os.listdir(path):
            info_path = os.path.join(path, task_dir, "task_info.pkl")
            if os.path.isfile(info_path):
                with open(info_path, "rb") as f:
                    self.task_info[task_dir] = pickle.load(f)

        # Correlation matrix.
        filename = os.path.join(experiment_name, "phen_correlation_matrix.npy")
        self.correlation_matrix = np.load(filename)
 def __init__(self, path, random_seed, fold):
     np.random.seed(random_seed)
     self.path = path
     self.linkfile = path + 'allPostLinkMap.pickle'
     # self.edgelistfile = path + 'edgelist.txt'
     self.labelfile = path + 'allPostLabelMap.pickle'
     self.authorfile = path + 'allPostAuthorMap.pickle'
     self.authorattrifile = path + 'allAuthorAttrisProc.pickle'
     self.authorlinkfile = path + 'allAuthorLinks.pickle'
     self.textfile = path + 'allUserTextSkip.pickle2'
     self.foldfile = path + 'allFolds.pickle'
     self.threadfile = path + 'allThreadPost.pickle'
     self.embfile = path + 'node.emb'
     self.fold = fold
     self.nodes_infor = []
     self.node_map = {} 
     with open(self.textfile, 'rb') as fin:
         allTextEmbed = pickle.load(fin, encoding='latin1')
         self.allTextMap = pickle.load(fin, encoding='latin1')
         fin.close()
     self.node_count = len(self.allTextMap)
     for i in range(self.node_count):
         self.add_node(i)
     self.read_label()
     self.read_text()
     self.read_link()
     self.label_count = len(self.label_map)
     # print('label count:', self.label_count)
     self.construct_data()
def load_data(dataset):

    print('... loading data')

    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)

    def shared_dataset(data_xy, borrow=True):
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX), borrow=borrow)

        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX), borrow=borrow)
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]

    return rval
예제 #19
0
 def load(cls, fn, compress=True, *args, **kwargs):
     if compress and not fn.strip().lower().endswith('.gz'):
         fn = fn + '.gz'
     assert os.path.isfile(fn), 'File %s does not exist.' % (fn,)
     if compress:
         return pickle.load(gzip.open(fn, 'rb'))
     return pickle.load(open(fn, 'rb'))
예제 #20
0
def sample(args):
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = cPickle.load(f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f:
        chars, vocab = cPickle.load(f)
    model = Model(saved_args, True)
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            ts = model.sample(sess, chars, vocab, args.n, args.prime, args.sample)
            print("Sampled Output\n")
            print(ts)
            print("Converting Text to Speech")
            tts = gTTS(text=ts, lang='en-uk')
            tts.save("ts.mp3")
            audio = MP3("ts.mp3")
            audio_length = audio.info.length
            print("Speaker is Getting Ready")
            mixer.init()
            mixer.music.load('ts.mp3')
            mixer.music.play()
            time.sleep(audio_length+5)
예제 #21
0
def get_data():
  """Get data in form suitable for episodic training.

  Returns:
    Train and test data as dictionaries mapping
    label to list of examples.
  """
  with tf.gfile.GFile(DATA_FILE_FORMAT % 'train', 'rb') as f:
    processed_train_data = pickle.load(f)
  with tf.gfile.GFile(DATA_FILE_FORMAT % 'test', 'rb') as f:
    processed_test_data = pickle.load(f)

  train_data = {}
  test_data = {}

  for data, processed_data in zip([train_data, test_data],
                                  [processed_train_data, processed_test_data]):
    for image, label in zip(processed_data['images'],
                            processed_data['labels']):
      if label not in data:
        data[label] = []
      data[label].append(image.reshape([-1]).astype('float32'))

  intersection = set(train_data.keys()) & set(test_data.keys())
  assert not intersection, 'Train and test data intersect.'
  ok_num_examples = [len(ll) == 20 for _, ll in train_data.items()]
  assert all(ok_num_examples), 'Bad number of examples in train data.'
  ok_num_examples = [len(ll) == 20 for _, ll in test_data.items()]
  assert all(ok_num_examples), 'Bad number of examples in test data.'

  logging.info('Number of labels in train data: %d.', len(train_data))
  logging.info('Number of labels in test data: %d.', len(test_data))

  return train_data, test_data
예제 #22
0
  def Init(self):
    TFunctionApprox.Init(self)
    L= self.Locate
    if self.Params['data_x'] != None:
      self.DataX= pickle.load(open(L(self.Params['data_x']), 'rb'))
    if self.Params['data_y'] != None:
      self.DataY= pickle.load(open(L(self.Params['data_y']), 'rb'))

    self.C= []
    self.Closests= []
    self.CDists= []  #Distance to the closest point

    if self.Params['C'] != None:
      self.C= copy.deepcopy(self.Params['C'])
    if self.Params['Closests'] != None:
      self.Closests= copy.deepcopy(self.Params['Closests'])
    if self.Params['CDists'] != None:
      self.CDists= copy.deepcopy(self.Params['CDists'])

    if self.Options['kernel']=='l2g':  #L2 norm Gaussian
      self.kernel= Gaussian
      self.dist= Dist
    elif self.Options['kernel']=='maxg':  #Max norm Gaussian
      self.kernel= GaussianM
      self.dist= DistM
    else:
      raise Exception('Undefined kernel type:',self.Options['kernel'])

    self.lazy_copy= True  #Assign True when DataX or DataY is updated.
    self.CheckPredictability()
예제 #23
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
예제 #24
0
def load_data(data_file):
    with gzip.open(data_file, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)

    return (train_set, valid_set, test_set)
예제 #25
0
 def _get_object_data(self):
     obj = None
     mime_data = cb.mimeData()
     if mime_data.hasFormat(PYTHON_TYPE):
         serialized_data = BytesIO(mime_data.data(PYTHON_TYPE).data())
         klass = load(serialized_data)
         obj = load(serialized_data)
     return obj
예제 #26
0
def run_LogisticRegression():
    # start by importing Deep Learning Funcs
    funcs = DeepLearnFuncs()
    
    learning_rate=0.0001
    n_epochs=1000
    batch_size=2
    dfLLdata = funcs.sgd_optimization(learning_rate, n_epochs, batch_size)
 
    ############
    ### plotting likelihood or cost
    ### the cost we minimize during training is the negative log likelihood of
    ############
    x = dfLLdata['iter'].values
    y = dfLLdata['LL_iter'].values
    plt.figure()
    plt.plot(x, y, 'bo--')
    plt.xlabel('iterations', fontsize=14)
    plt.ylabel('negative log likelihood', fontsize=14)
    plt.title('LogReg: learning_rate = '+str(learning_rate)+' batch_size = '+str(batch_size), fontsize=14)

   
    ############
    ### plotting likelihood or cost
    ############     
    x = dfLLdata['iter'].values
    y = dfLLdata['0-1-loss'].values
    plt.figure()
    plt.plot(x, y, 'bo--')
    plt.xlabel('iterations')
    plt.ylabel('0-1-loss %')
    plt.title('LogReg: learning_rate = '+str(learning_rate)+' batch_size = '+str(batch_size))

    ############
    # load the saved model 
    ############
    classifier = cPickle.load(open('best_model.pkl'))
    
    # compile a predictor function 
    predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred)
    
    # We can test it on some examples from test test 
    test_set = cPickle.load(open('test_set.pkl'))
    test_set_x, test_set_y = test_set 
    test_set_x = test_set_x.get_value() 
    
    # We can test it on some examples from test test 
    test_labels = cPickle.load(open('test_labels.pkl'))
    
    predicted_values = predict_model(test_set_x[13:18]) 
    print ("Predicted/Labels values for the first 10 examples in test set:") 
    print predicted_values
    print test_labels[13:18]
    
    fig, ax = plt.subplots(ncols=1, nrows=1)
    img = np.reshape(test_set_x[13],(30,30))    
    ax.imshow(img, cmap="Greys_r")
    plt.show()
예제 #27
0
파일: stock.py 프로젝트: migueee/pylearn
def load_data(dataset):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    print('... loading data')

    # Load the dataset
    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix)
    # where each row corresponds to an example. target is a
    # numpy.ndarray of 1 dimension (vector) that has the same length as
    # the number of rows in the input. It should give the target
    # to the example with the same index in the input.

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
예제 #28
0
def load_mnist(dataset, shared = False):
    """Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MINST)
    """

    ###############
    #  LOAD DATA  #
    ###############
    
    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
                os.path.split(__file__)[0],
                dataset
                )
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        from six.moves import urllib
        origin = (
                'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
                )
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

    print('... loading mnist')

    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is numpy.ndarray of 2 dimensions (a matrix)
    # where each row corresponds to an example. target is a
    # numpy.ndarray of 1 dimension (vector) that has the same length as
    # the number of rows in the input. It should give the target 
    # to the example whith the same index in the input.

    if shared:
        test_set_x, test_set_y = shared_dataset(test_set)
        valid_set_x, valid_set_y = shared_dataset(valid_set)
        train_set_x, train_set_y = shared_dataset(train_set)
    else:
        test_set_x, test_set_y = test_set
        valid_set_x, valid_set_y = valid_set
        train_set_x, train_set_y = train_set


    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]

    return rval
예제 #29
0
def unpickle(file):
    fp = open(file, 'rb')
    if sys.version_info.major == 2:
        data = pickle.load(fp)
    elif sys.version_info.major == 3:
        data = pickle.load(fp, encoding='latin-1')
    fp.close()

    return data
예제 #30
0
def atisfold(fold):
    assert fold in range(5)
    filename = os.path.join(PREFIX, 'atis.fold'+str(fold)+'.pkl.gz')
    f = gzip.open(filename, 'rb')
    try:
        train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1')
    except:
        train_set, valid_set, test_set, dicts = pickle.load(f)
    return train_set, valid_set, test_set, dicts
예제 #31
0
 def load_params(self,file_):
     for k,v in pickle.load(file_).items():
         self.params_di[k].set_value(v)
예제 #32
0
 def load(self, path):
     with open(path, 'rb') as f:
         load_lib = cPickle.load(f)
         for attr in ['pid', 'name', 'added', 'songs', 'playlists', 'folders']:
             self.__dict__[attr] = getattr(load_lib, attr)
     self._path = path
예제 #33
0
if __name__ == '__main__':

    setting = None
    with open('mysql_setting.yml', 'r') as f:
        setting = yaml.load(f)

    connection = pymysql.connect(host=setting['host'],
                                 user=setting['user'],
                                 password=setting['password'],
                                 db='rakuten_recipe',
                                 charset='utf8mb4',
                                 cursorclass=pymysql.cursors.SSCursor)

    category_recipe_ids = None
    with open('result/category_recipe_ids_b.out', 'rb') as f:
        category_recipe_ids = pickle.load(f)

    vocab_set = set()

    for category, recipe_ids in category_recipe_ids.items():

        for recipe_id in recipe_ids:

            with connection.cursor() as cursor:
                sql = "select title from recipes where recipe_id = {};".format(
                    recipe_id)

                cursor.execute(sql)

                result = cursor.fetchone()
예제 #34
0
 def get_string_as_list(self,any_subgroup_string):
   from io import BytesIO
   S = BytesIO(any_subgroup_string.encode("ascii"))
   subgroups = pickle.load( S )
   return subgroups
예제 #35
0
def unpickle(file_path):
    with open(file_path, 'rb') as cPickle_file:
        a = cPickle.load(cPickle_file)
    return a
def handler(dn, new, old, command):
	# type: (str, dict, dict, str) -> None
	# create tmp dir
	tmpDir = os.path.dirname(tmpFile)
	listener.setuid(0)
	try:
		if not os.path.exists(tmpDir):
			os.makedirs(tmpDir)
	except Exception as exc:
		ud.debug(ud.LISTENER, ud.ERROR, "%s: could not create tmp dir %s (%s)" % (name, tmpDir, exc))
		return
	finally:
		listener.unsetuid()

	# modrdn stuff
	# 'r'+'a' -> renamed
	# command='r' and "not new and old"
	# command='a' and "new and not old"

	# write old object to pickle file
	oldObject = {}
	listener.setuid(0)
	try:
		# object was renamed -> save old object
		if command == "r" and old:
			with open(tmpFile, "wb") as fp:
				os.chmod(tmpFile, 0o600)
				pickle.dump({"dn": dn, "old": old}, fp)
		elif command == "a" and not old:
			if os.path.isfile(tmpFile):
				with open(tmpFile, "rb") as fp:
					p = pickle.load(fp)
				oldObject = p.get("old", {})
				os.remove(tmpFile)
	except Exception as exc:
		if os.path.isfile(tmpFile):
			os.remove(tmpFile)
		ud.debug(ud.LISTENER, ud.ERROR, "%s: could not read/write tmp file %s (%s)" % (name, tmpFile, exc))
	finally:
		listener.unsetuid()

	# update exports file
	lines = _read(lambda match: not match or match.group(1) != _quote(dn))

	if new and b'univentionShareNFS' in new.get('objectClass', []):
		path = new['univentionSharePath'][0].decode('UTF-8')
		options = [
			'rw' if new.get('univentionShareWriteable', [b''])[0] == b'yes' else 'ro',
			'root_squash' if new.get('univentionShareNFSRootSquash', [b''])[0] == b'yes' else 'no_root_squash',
			'async' if new.get('univentionShareNFSSync', [b''])[0] == b'async' else 'sync',
			'subtree_check' if new.get('univentionShareNFSSubTree', [b''])[0] == b'yes' else 'no_subtree_check',
		] + [cs.decode('UTF-8') for cs in new.get('univentionShareNFSCustomSetting', [])]
		lines.append('%s -%s %s # LDAP:%s' % (
			_exports_escape(path),
			_quote(','.join(options)),
			_quote(' '.join(nfs_allowed.decode('ASCII') for nfs_allowed in new.get('univentionShareNFSAllowed', [b'*']))),
			_quote(dn)
		))

		_write(lines)

		listener.setuid(0)
		try:
			# object was renamed
			if not old and oldObject and command == "a":
				old = oldObject
			ret = univention.lib.listenerSharePath.createOrRename(old, new, listener.configRegistry)
			if ret:
				ud.debug(ud.LISTENER, ud.ERROR, "%s: rename/create of sharePath for %s failed (%s)" % (name, dn, ret))
		finally:
			listener.unsetuid()
	else:
		_write(lines)
예제 #37
0
import numpy as np
from six.moves import cPickle

filename = "train_cost.save"
f = open(filename, 'rb')
train_cost = cPickle.load(f)
f.close()

filename = "validate_cost.save"
f = open(filename, 'rb')
validate_cost = cPickle.load(f)
f.close()

filename = "best_learning_rate.save"
f = open(filename, 'rb')
best_learning_rate = cPickle.load(f)
f.close()

print(train_cost)
print(validate_cost)
print(best_learning_rate)
예제 #38
0
def load_data(config):
    """
    Load HASYv2 dataset.

    Parameters
    ----------
    mode : string, optional (default: "complete")
        - "complete" : Returns {'x': x, 'y': y} with all labeled data
        - "fold-1": Returns {'x_train': x_train,
                             'y_train': y_train,
                             'x_test': x_test,
                             'y_test': y_test}
        - "fold-2", ..., "fold-10": See "fold-1"

    Returns
    -------
    dict
        See "mode" parameter for details
    """
    mode = 'fold-1'

    # Download if not already done
    fname = 'HASYv2.tar.bz2'
    origin = 'https://zenodo.org/record/259444/files/HASYv2.tar.bz2'
    fpath = get_file(fname,
                     origin=origin,
                     untar=False,
                     md5_hash='fddf23f36e24b5236f6b3a0880c778e3')
    path = os.path.dirname(fpath)

    # Extract content if not already done
    untar_fpath = os.path.join(path, "HASYv2")
    if not os.path.exists(untar_fpath):
        print('Extract contents from archive...')
        tfile = tarfile.open(fpath, 'r:bz2')
        try:
            tfile.extractall(path=untar_fpath)
        except (Exception, KeyboardInterrupt) as e:
            if os.path.exists(untar_fpath):
                if os.path.isfile(untar_fpath):
                    os.remove(untar_fpath)
                else:
                    shutil.rmtree(untar_fpath)
            raise
        tfile.close()

    # Create pickle if not already done
    pickle_fpath = os.path.join(untar_fpath, "hasy-data.pickle")
    if not os.path.exists(pickle_fpath):
        # Load mapping from symbol names to indices
        symbol_csv_fpath = os.path.join(untar_fpath, "symbols.csv")
        symbol_id2index, labels = _generate_index(symbol_csv_fpath)
        globals()["labels"] = labels

        # Load data
        data_csv_fpath = os.path.join(untar_fpath, "hasy-data-labels.csv")
        data_csv = _load_csv(data_csv_fpath)
        x_compl = np.zeros((len(data_csv), 1, WIDTH, HEIGHT), dtype=np.uint8)
        y_compl = []
        s_compl = []
        path2index = {}

        # Load HASYv2 data
        for i, data_item in enumerate(data_csv):
            fname = os.path.join(untar_fpath, data_item['path'])
            s_compl.append(fname)
            x_compl[i, 0, :, :] = scipy.ndimage.imread(fname,
                                                       flatten=False,
                                                       mode='L')
            label = symbol_id2index[data_item['symbol_id']]
            y_compl.append(label)
            path2index[fname] = i
        y_compl = np.array(y_compl, dtype=np.int64)

        data = {
            'x': x_compl,
            'y': y_compl,
            's': s_compl,
            'labels': labels,
            'path2index': path2index
        }

        # Store data as pickle to speed up later calls
        with open(pickle_fpath, 'wb') as f:
            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        with open(pickle_fpath, 'rb') as f:
            data = pickle.load(f)
        globals()["labels"] = data['labels']

    labels = data['labels']
    x_compl = data['x']
    y_compl = np.reshape(data['y'], (len(data['y']), 1))
    s_compl = data['s']
    path2index = data['path2index']

    if K.image_dim_ordering() == 'tf':
        x_compl = x_compl.transpose(0, 2, 3, 1)

    if mode == 'complete':
        return {'x': x_compl, 'y': y_compl}
    elif mode.startswith('fold-'):
        fold = int(mode.split("-")[1])
        if not (1 <= fold <= 10):
            raise NotImplementedError

        # Load fold
        fold_dir = os.path.join(untar_fpath,
                                "classification-task/fold-{}".format(fold))
        train_csv_fpath = os.path.join(fold_dir, "train.csv")
        test_csv_fpath = os.path.join(fold_dir, "test.csv")
        train_csv = _load_csv(train_csv_fpath)
        test_csv = _load_csv(test_csv_fpath)

        train_ids = np.array([path2index[row['path']] for row in train_csv])
        test_ids = np.array([path2index[row['path']] for row in test_csv])

        x_train = x_compl[train_ids]
        x_test = x_compl[test_ids]
        y_train = y_compl[train_ids]
        y_test = y_compl[test_ids]
        s_train = [s_compl[id_] for id_ in train_ids]
        s_test = [s_compl[id_] for id_ in test_ids]

        splitd = train_test_split(x_train,
                                  y_train,
                                  s_train,
                                  test_size=0.10,
                                  random_state=42,
                                  stratify=y_train)
        x_train, x_val, y_train, y_val, s_train, s_val = splitd

        data = {
            'x_train': x_train,
            'y_train': y_train,
            'x_test': x_test,
            'y_test': y_test,
            'x_val': x_val,
            'y_val': y_val,
            's_train': s_train,
            's_val': s_val,
            's_test': s_test,
            'labels': labels
        }
        return data
    else:
        raise NotImplementedError
예제 #39
0
                                                   value=0.),
        'R':
        keras.preprocessing.sequence.pad_sequences(x['R'],
                                                   maxlen=maxTitleLen,
                                                   value=0.),
        'L':
        keras.utils.to_categorical(x['L'], num_classes=2)
    }


if __name__ == "__main__":
    path = u.getMostRecentOf("prepared-data/recommender-v1", "pkl")
    print("Loading dataset " + str(path) + "... ")

    with open(path, 'rb') as f:
        train = pickle.load(f)
        validate = pickle.load(f)
        test = pickle.load(f)
        ref_dict = pickle.load(f)
        word_mapper = pickle.load(f)

    train = prepSet(train)
    validate = prepSet(validate)
    test = prepSet(test)

    print("Train size: " + str(len(train['T'])))
    print("Test size: " + str(len(test['T'])))
    print("Validation size: " + str(len(validate['T'])))

    print(train['T'][1:5])
    print(test['T'][1:5])
예제 #40
0
def unpickle(filename):
    with open(filename, 'rb') as fo:
        return cPickle.load(fo, encoding='latin1')
예제 #41
0
        ori_N_total = N_total  # number of items to simulate
        mt = flex.mersenne_twister(seed=0)
        random_orientations = []
        for iteration in range(ori_N_total):
            random_orientations.append(mt.random_double_r3_rotation_matrix())

        transmitted_info = dict(
            spectra=SS,
            amplitudes=A,
            orientations=random_orientations,
        )
        with (open("confirm_P1_range_reduced_intensities_dict.pickle",
                   "rb")) as F:  # Einsle reduced
            #with (open("confirm_P1_range_oxidized_intensities_dict.pickle","rb")) as F: # Einsle oxidized
            #with (open("confirm_P1_range_metallic_intensities_dict.pickle","rb")) as F: # Einsle metallic
            intensity_dict = pickle.load(F)
            transmitted_info["intensity_dict"] = intensity_dict
        print("finished setup in rank 0")
    else:
        transmitted_info = None
    if usingMPI:
        transmitted_info = comm.bcast(transmitted_info, root=0)
        comm.barrier()
        import os
        host = os.environ["HOST"]
        print("barrier from rank %d of %d" % (rank, size), host)

    origin = col((1500, 1500))
    position0 = col((1500, 3000)) - origin
    nitem = 0
    nall_spots = 0
예제 #42
0
  def _load_datablocks(self, obj, check_format=True, directory=None):
    ''' Create the datablock from a dictionary. '''
    from dxtbx.format.Registry import Registry
    from dxtbx.model import BeamFactory, DetectorFactory
    from dxtbx.model import GoniometerFactory, ScanFactory
    from dxtbx.serialize.filename import load_path
    from dxtbx.format.image import ImageBool, ImageDouble
    from dxtbx.format.FormatMultiImage import FormatMultiImage

    # If we have a list, extract for each dictionary in the list
    if isinstance(obj, list):
      return [self._load_datablocks(dd, check_format, directory) for dd in obj]
    elif not isinstance(obj, dict):
      raise InvalidDataBlockError("Unexpected datablock type {} instead of dict".format(type(obj)))
    # Make sure the id signature is correct
    if not obj.get("__id__") == "DataBlock":
      raise InvalidDataBlockError(
        "Expected __id__ 'DataBlock', but found {}".format(repr(obj.get("__id__"))))

    # Get the list of models
    blist = obj.get('beam', [])
    dlist = obj.get('detector', [])
    glist = obj.get('goniometer', [])
    slist = obj.get('scan', [])

    def load_models(obj):
      try:
        beam = BeamFactory.from_dict(blist[obj['beam']])
      except Exception:
        beam = None
      try:
        dobj = dlist[obj['detector']]
        detector = DetectorFactory.from_dict(dobj)
      except Exception:
        detector = None
      try:
        gonio = GoniometerFactory.from_dict(glist[obj['goniometer']])
      except Exception:
        gonio = None
      try:
        scan = ScanFactory.from_dict(slist[obj['scan']])
      except Exception:
        scan = None
      return beam, detector, gonio, scan

    # Loop through all the imagesets
    imagesets = []
    for imageset in obj['imageset']:
      ident = imageset['__id__']
      if "params" in imageset:
        format_kwargs = imageset['params']
      else:
        format_kwargs = {}
      if ident == 'ImageSweep':
        beam, detector, gonio, scan = load_models(imageset)
        if "template" in imageset:
          template = load_path(imageset['template'], directory=directory)
          i0, i1 = scan.get_image_range()
          iset = dxtbx.imageset.ImageSetFactory.make_sweep(
            template, range(i0, i1+1), None,
            beam, detector, gonio, scan, check_format,
            format_kwargs=format_kwargs)
          if 'mask' in imageset and imageset['mask'] is not None:
            imageset['mask'] = load_path(imageset['mask'], directory=directory)
            iset.external_lookup.mask.filename = imageset['mask']
            if check_format:
              with open(imageset['mask']) as infile:
                iset.external_lookup.mask.data = ImageBool(pickle.load(infile))
          if 'gain' in imageset and imageset['gain'] is not None:
            imageset['gain'] = load_path(imageset['gain'], directory=directory)
            iset.external_lookup.gain.filename = imageset['gain']
            if check_format:
              with open(imageset['gain']) as infile:
                iset.external_lookup.gain.data = ImageDouble(pickle.load(infile))
          if 'pedestal' in imageset and imageset['pedestal'] is not None:
            imageset['pedestal'] = load_path(imageset['pedestal'], directory=directory)
            iset.external_lookup.pedestal.filename = imageset['pedestal']
            if check_format:
              with open(imageset['pedestal']) as infile:
                iset.external_lookup.pedestal.data = ImageDouble(pickle.load(infile))
          if 'dx' in imageset and imageset['dx'] is not None:
            imageset['dx'] = load_path(imageset['dx'], directory=directory)
            iset.external_lookup.dx.filename = imageset['dx']
            with open(imageset['dx']) as infile:
              iset.external_lookup.dx.data = ImageDouble(pickle.load(infile))
          if 'dy' in imageset and imageset['dy'] is not None:
            imageset['dy'] = load_path(imageset['dy'], directory=directory)
            iset.external_lookup.dy.filename = imageset['dy']
            with open(imageset['dy']) as infile:
              iset.external_lookup.dy.data = ImageDouble(pickle.load(infile))
          iset.update_detector_px_mm_data()
        elif "master" in imageset:
          template = load_path(imageset['master'], directory=directory)
          i0, i1 = scan.get_image_range()
          indices = imageset['images']
          if check_format == False:
            format_class = FormatMultiImage
          else:
            format_class = None
          iset = dxtbx.imageset.ImageSetFactory.make_sweep(
            template,
            list(range(i0, i1+1)),
            format_class  = format_class,
            beam          = beam,
            detector      = detector,
            goniometer    = gonio,
            scan          = scan,
            check_format  = check_format,
            format_kwargs = format_kwargs)
          if 'mask' in imageset and imageset['mask'] is not None:
            imageset['mask'] = load_path(imageset['mask'], directory)
            iset.external_lookup.mask.filename = imageset['mask']
            if check_format:
              with open(imageset['mask']) as infile:
                iset.external_lookup.mask.data = ImageBool(pickle.load(infile))
          if 'gain' in imageset and imageset['gain'] is not None:
            imageset['gain'] = load_path(imageset['gain'], directory)
            iset.external_lookup.gain.filename = imageset['gain']
            if check_format:
              with open(imageset['gain']) as infile:
                iset.external_lookup.gain.data = ImageDouble(pickle.load(infile))
          if 'pedestal' in imageset and imageset['pedestal'] is not None:
            imageset['pedestal'] = load_path(imageset['pedestal'], directory)
            iset.external_lookup.pedestal.filename = imageset['pedestal']
            if check_format:
              with open(imageset['pedestal']) as infile:
                iset.external_lookup.pedestal.data = ImageDouble(pickle.load(infile))
          if 'dx' in imageset and imageset['dx'] is not None:
            imageset['dx'] = load_path(imageset['dx'], directory)
            iset.external_lookup.dx.filename = imageset['dx']
            with open(imageset['dx']) as infile:
              iset.external_lookup.dx.data = ImageDouble(pickle.load(infile))
          if 'dy' in imageset and imageset['dy'] is not None:
            imageset['dy'] = load_path(imageset['dy'], directory)
            iset.external_lookup.dy.filename = imageset['dy']
            with open(imageset['dy']) as infile:
              iset.external_lookup.dy.data = ImageDouble(pickle.load(infile))
          iset.update_detector_px_mm_data()
        imagesets.append(iset)
      elif ident == 'ImageSet' or ident == "ImageGrid":
        filenames = [image['filename'] for image in imageset['images']]
        indices = [image['image'] for image in imageset['images'] if 'image' in image]
        assert len(indices) == 0 or len(indices) == len(filenames)
        iset = dxtbx.imageset.ImageSetFactory.make_imageset(
          filenames, None, check_format, indices, format_kwargs=format_kwargs)
        if ident == "ImageGrid":
          grid_size = imageset['grid_size']
          iset = dxtbx.imageset.ImageGrid.from_imageset(iset, grid_size)
        for i, image in enumerate(imageset['images']):
          beam, detector, gonio, scan = load_models(image)
          iset.set_beam(beam, i)
          iset.set_detector(detector, i)
          iset.set_goniometer(gonio, i)
          iset.set_scan(scan, i)
        if 'mask' in imageset and imageset['mask'] is not None:
          imageset['mask'] = load_path(imageset['mask'], directory)
          iset.external_lookup.mask.filename = imageset['mask']
          if check_format:
            with open(imageset['mask']) as infile:
              iset.external_lookup.mask.data = ImageBool(pickle.load(infile))
        if 'gain' in imageset and imageset['gain'] is not None:
          imageset['gain'] = load_path(imageset['gain'], directory)
          iset.external_lookup.gain.filename = imageset['gain']
          if check_format:
            with open(imageset['gain']) as infile:
              iset.external_lookup.gain.data = ImageDouble(pickle.load(infile))
        if 'pedestal' in imageset and imageset['pedestal'] is not None:
          imageset['pedestal'] = load_path(imageset['pedestal'], directory)
          iset.external_lookup.pedestal.filename = imageset['pedestal']
          if check_format:
            with open(imageset['pedestal']) as infile:
              iset.external_lookup.pedestal.data = ImageDouble(pickle.load(infile))
        if 'dx' in imageset and imageset['dx'] is not None:
          imageset['dx'] = load_path(imageset['dx'], directory)
          iset.external_lookup.dx.filename = imageset['dx']
          with open(imageset['dx']) as infile:
            iset.external_lookup.dx.data = ImageDouble(pickle.load(infile))
        if 'dy' in imageset and imageset['dy'] is not None:
          imageset['dy'] = load_path(imageset['dy'], directory)
          iset.external_lookup.dy.filename = imageset['dy']
          with open(imageset['dy']) as infile:
            iset.external_lookup.dy.data = ImageDouble(pickle.load(infile))
          iset.update_detector_px_mm_data()
        imagesets.append(iset)
      else:
        raise RuntimeError('expected ImageSet/ImageSweep, got %s' % ident)

    # Return the datablock
    return DataBlock(imagesets)
def process_in_parallel(tag,
                        total_range_size,
                        binary,
                        output_dir,
                        load_ckpt,
                        load_detectron,
                        opts=''):
    """Run the specified binary NUM_GPUS times in parallel, each time as a
    subprocess that uses one GPU. The binary must accept the command line
    arguments `--range {start} {end}` that specify a data processing range.
    """
    # Snapshot the current cfg state in order to pass to the inference
    # subprocesses
    cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag))
    with open(cfg_file, 'w') as f:
        yaml.dump(cfg, stream=f)
    subprocess_env = os.environ.copy()
    processes = []
    NUM_GPUS = torch.cuda.device_count()
    subinds = np.array_split(range(total_range_size), NUM_GPUS)
    # Determine GPUs to use
    cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
    if cuda_visible_devices:
        gpu_inds = list(map(int, cuda_visible_devices.split(',')))
        assert -1 not in gpu_inds, \
            'Hiding GPU indices using the \'-1\' index is not supported'
    else:
        gpu_inds = range(cfg.NUM_GPUS)
    gpu_inds = list(gpu_inds)
    # Run the binary in cfg.NUM_GPUS subprocesses
    for i, gpu_ind in enumerate(gpu_inds):
        start = subinds[i][0]
        end = subinds[i][-1] + 1
        subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
        cmd = (
            'python3 {binary} --range {start} {end} --cfg {cfg_file} --set {opts} '
            '--output_dir {output_dir}')
        if load_ckpt is not None:
            cmd += ' --load_ckpt {load_ckpt}'
        elif load_detectron is not None:
            cmd += ' --load_detectron {load_detectron}'
        cmd = cmd.format(binary=shlex_quote(binary),
                         start=int(start),
                         end=int(end),
                         cfg_file=shlex_quote(cfg_file),
                         output_dir=output_dir,
                         load_ckpt=load_ckpt,
                         load_detectron=load_detectron,
                         opts=' '.join([shlex_quote(opt) for opt in opts]))
        logger.info('{} range command {}: {}'.format(tag, i, cmd))
        if i == 0:
            subprocess_stdout = subprocess.PIPE
        else:
            filename = os.path.join(
                output_dir, '%s_range_%s_%s.stdout' % (tag, start, end))
            subprocess_stdout = open(filename, 'w')
        p = subprocess.Popen(cmd,
                             shell=True,
                             env=subprocess_env,
                             stdout=subprocess_stdout,
                             stderr=subprocess.STDOUT,
                             bufsize=1)
        processes.append((i, p, start, end, subprocess_stdout))
    # Log output from inference processes and collate their results
    outputs = []
    for i, p, start, end, subprocess_stdout in processes:
        log_subprocess_output(i, p, output_dir, tag, start, end)
        if isinstance(subprocess_stdout, IOBase):
            subprocess_stdout.close()
        range_file = os.path.join(output_dir,
                                  '%s_range_%s_%s.pkl' % (tag, start, end))
        range_data = pickle.load(open(range_file, 'rb'))
        outputs.append(range_data)
    return outputs
예제 #44
0
        self.input_label_h5 = ''
        self.input_json = ''
        self.split = 'test'

        self.coco_json = ''
        self.id = '' 
        self.pretrained_weight= 1    ##### xud

# In[3]:

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
opt = Opt() 
opt.infos_path = './log_st/infos_-best_attention_wehao.pkl'
# Load infos
with open(opt.infos_path) as f:
    infos = cPickle.load(f) 


# In[4]:

# override and collect parameters
if len(opt.input_fc_dir) == 0:
    opt.input_fc_h5 = infos['opt'].input_fc_h5
    opt.input_att_h5 = infos['opt'].input_att_h5
    opt.input_label_h5 = infos['opt'].input_label_h5

if len(opt.input_json) == 0:
    opt.input_json = infos['opt'].input_json
if opt.batch_size == 0:
    opt.batch_size = infos['opt'].batch_size
if len(opt.id) == 0:
예제 #45
0
 def __init__(self,datadir,work_params,plot=False,esd_plot=False,half_data_flag=0):
  casetag = work_params.output.prefix
  # read the ground truth values back in
  from six.moves import cPickle as pickle
  # it is assumed (for now) that the reference millers contain a complete asymmetric unit
  # of indices, within the (d_max,d_min) region of interest and possibly outside the region.
  reference_millers = pickle.load(open(os.path.join(datadir,casetag+"_miller.pickle"),"rb"))
  experiment_manager = read_experiments(work_params)

  obs = pickle.load(open(os.path.join(datadir,casetag+"_observation.pickle"),"rb"))
  print("Read in %d observations"%(len(obs["observed_intensity"])))
  reference_millers.show_summary(prefix="Miller index file ")

  print(len(obs["frame_lookup"]),len(obs["observed_intensity"]), flex.max(obs['miller_lookup']),flex.max(obs['frame_lookup']))
  max_frameno = flex.max(obs["frame_lookup"])

  from iotbx import mtz
  mtz_object = mtz.object(file_name=work_params.scaling.mtz_file)
  #for array in mtz_object.as_miller_arrays():
  #  this_label = array.info().label_string()
  #  print this_label, array.observation_type()
  I_sim = mtz_object.as_miller_arrays()[0].as_intensity_array()
  I_sim.show_summary()
  MODEL_REINDEX_OP = work_params.model_reindex_op
  I_sim = I_sim.change_basis(MODEL_REINDEX_OP).map_to_asu()

  #match up isomorphous (the simulated fake F's) with experimental unique set
  matches = miller.match_multi_indices(
      miller_indices_unique=reference_millers.indices(),
      miller_indices=I_sim.indices())

  print("original unique",len(reference_millers.indices()))
  print("isomorphous set",len(I_sim.indices()))
  print("pairs",len(matches.pairs()))
  iso_data = flex.double(len(reference_millers.indices()))

  for pair in matches.pairs():
    iso_data[pair[0]] = I_sim.data()[pair[1]]

  reference_data = miller.array(miller_set = reference_millers,
                                data = iso_data)
  reference_data.set_observation_type_xray_intensity()

  FOBS = prepare_observations_for_scaling(work_params,obs=obs,
                                          reference_intensities=reference_data,
                                          files = experiment_manager.get_files(),
                                          half_data_flag=half_data_flag)

  I,I_visited,G,G_visited = I_and_G_base_estimate(FOBS,params=work_params)
  print("I length",len(I), "G length",len(G), "(Reference set; entire asymmetric unit)")
  assert len(reference_data.data()) == len(I)

  #presumably these assertions fail when half data are taken for CC1/2 or d_min is cut
  model_I = reference_data.data()[0:len(I)]

  T = Timer("%d frames"%(len(G), ))

  mapper = mapper_factory(xscale6e)
  minimizer = mapper(I,G,I_visited,G_visited,FOBS,params=work_params,
                     experiments=experiment_manager.get_experiments())

  del T
  minimizer.show_summary()

  Fit = minimizer.e_unpack()
  Gstats=flex.mean_and_variance(Fit["G"].select(G_visited==1))
  print("G mean and standard deviation:",Gstats.mean(),Gstats.unweighted_sample_standard_deviation())
  if "Bfactor" in work_params.levmar.parameter_flags:
    Bstats=flex.mean_and_variance(Fit["B"].select(G_visited==1))
    print("B mean and standard deviation:",Bstats.mean(),Bstats.unweighted_sample_standard_deviation())
  show_correlation(Fit["I"],model_I,I_visited,"Correlation of I:")
  Fit_stddev = minimizer.e_unpack_stddev()

  # XXX FIXME known bug:  the length of Fit["G"] could be smaller than the length of experiment_manager.get_files()
  # Not sure if this has any operational drawbacks.  It's a result of half-dataset selection.

  if plot:
    plot_it(Fit["I"], model_I, mode="I")
    if "Rxy" in work_params.levmar.parameter_flags:
      show_histogram(Fit["Ax"],"Histogram of x rotation (degrees)")
      show_histogram(Fit["Ay"],"Histogram of y rotation (degrees)")
  print()

  if esd_plot:
    minimizer.esd_plot()

  from cctbx.examples.merging.show_results import show_overall_observations
  table1,self.n_bins,self.d_min = show_overall_observations(
           Fit["I"],Fit_stddev["I"],I_visited,
           reference_data,FOBS,title="Statistics for all reflections",
           work_params = work_params)

  self.FSIM=FOBS
  self.ordered_intensities=reference_data
  self.reference_millers=reference_millers
  self.Fit_I=Fit["I"]
  self.Fit_I_stddev=Fit_stddev["I"]
  self.I_visited=I_visited
  self.Fit = Fit
  self.experiments = experiment_manager
def load_pkl(file_name):
  pkl = open(file_name, "rb")
  data = pickle.load(pkl)
  pkl.close()
  return data
예제 #47
0
                pred_y = np.argmax(logit_outputs, axis=1)
                true_y = np.argmax(current_batch_labels_test, axis=1)
                true_cnt += sum(pred_y == true_y)
                test_batch_cnt += 1

            test_acc = true_cnt / float(test_batch_cnt * opt.batch_size)
            print("epoch {}  test_acc {:.4f}  test_num: {}".format(
                epoch, test_acc, test_batch_cnt * opt.batch_size))


if __name__ == '__main__':
    opt = opts.parse_opt()
    opt_dict = vars(opt)
    for k, v in opt_dict.items():
        print(k + ': \t' + str(v))

    with open('permuted_mnist_110.pkl', 'rb') as f:
        permuted_mnist = cPickle.load(f)

    if os.path.isdir(opt.rcst_model_save_path) is False:
        os.mkdir(opt.rcst_model_save_path)

    x_train_permuted = permuted_mnist['x_train_permuted']
    y_train = permuted_mnist['y_train']

    x_test_permuted = permuted_mnist['x_test_permuted']
    y_test = permuted_mnist['y_test']

    # training
    train(opt, x_train_permuted, y_train, x_test_permuted, y_test)
예제 #48
0
    def test_run_once(self, mock_check_drive):
        mock_check_drive.side_effect = lambda r, d, mc: os.path.join(r, d)
        ou = object_updater.ObjectUpdater(
            {
                'devices': self.devices_dir,
                'mount_check': 'false',
                'swift_dir': self.testdir,
                'interval': '1',
                'concurrency': '1',
                'node_timeout': '15'
            },
            logger=self.logger)
        ou.run_once()
        async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0]))
        os.mkdir(async_dir)
        ou.run_once()
        self.assertTrue(os.path.exists(async_dir))
        # each run calls check_device
        self.assertEqual([
            mock.call(self.devices_dir, 'sda1', False),
            mock.call(self.devices_dir, 'sda1', False),
        ], mock_check_drive.mock_calls)
        mock_check_drive.reset_mock()

        ou = object_updater.ObjectUpdater(
            {
                'devices': self.devices_dir,
                'mount_check': 'TrUe',
                'swift_dir': self.testdir,
                'interval': '1',
                'concurrency': '1',
                'node_timeout': '15'
            },
            logger=self.logger)
        odd_dir = os.path.join(async_dir, 'not really supposed ' 'to be here')
        os.mkdir(odd_dir)
        ou.run_once()
        self.assertTrue(os.path.exists(async_dir))
        self.assertEqual([
            mock.call(self.devices_dir, 'sda1', True),
        ], mock_check_drive.mock_calls)

        ohash = hash_path('a', 'c', 'o')
        odir = os.path.join(async_dir, ohash[-3:])
        mkdirs(odir)
        older_op_path = os.path.join(
            odir, '%s-%s' % (ohash, normalize_timestamp(time() - 1)))
        op_path = os.path.join(odir,
                               '%s-%s' % (ohash, normalize_timestamp(time())))
        for path in (op_path, older_op_path):
            with open(path, 'wb') as async_pending:
                pickle.dump(
                    {
                        'op': 'PUT',
                        'account': 'a',
                        'container': 'c',
                        'obj': 'o',
                        'headers': {
                            'X-Container-Timestamp': normalize_timestamp(0)
                        }
                    }, async_pending)
        ou.run_once()
        self.assertTrue(not os.path.exists(older_op_path))
        self.assertTrue(os.path.exists(op_path))
        self.assertEqual(ou.logger.get_increment_counts(), {
            'failures': 1,
            'unlinks': 1
        })
        self.assertIsNone(pickle.load(open(op_path, 'rb')).get('successes'))

        bindsock = listen_zero()

        def accepter(sock, return_code):
            try:
                with Timeout(3):
                    inc = sock.makefile('rb')
                    out = sock.makefile('wb')
                    out.write(b'HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' %
                              return_code)
                    out.flush()
                    self.assertEqual(inc.readline(),
                                     b'PUT /sda1/0/a/c/o HTTP/1.1\r\n')
                    headers = HeaderKeyDict()
                    line = inc.readline()
                    while line and line != b'\r\n':
                        headers[line.split(b':')[0]] = \
                            line.split(b':')[1].strip()
                        line = inc.readline()
                    self.assertIn(b'x-container-timestamp', headers)
                    self.assertIn(b'X-Backend-Storage-Policy-Index', headers)
            except BaseException as err:
                return err
            return None

        def accept(return_codes):
            try:
                events = []
                for code in return_codes:
                    with Timeout(3):
                        sock, addr = bindsock.accept()
                        events.append(spawn(accepter, sock, code))
                for event in events:
                    err = event.wait()
                    if err:
                        raise err
            except BaseException as err:
                return err
            return None

        event = spawn(accept, [201, 500, 500])
        for dev in ou.get_container_ring().devs:
            if dev is not None:
                dev['port'] = bindsock.getsockname()[1]

        ou.logger._clear()
        ou.run_once()
        err = event.wait()
        if err:
            raise err
        self.assertTrue(os.path.exists(op_path))
        self.assertEqual(ou.logger.get_increment_counts(), {'failures': 1})
        self.assertEqual([0],
                         pickle.load(open(op_path, 'rb')).get('successes'))

        event = spawn(accept, [404, 201])
        ou.logger._clear()
        ou.run_once()
        err = event.wait()
        if err:
            raise err
        self.assertTrue(os.path.exists(op_path))
        self.assertEqual(ou.logger.get_increment_counts(), {'failures': 1})
        self.assertEqual([0, 2],
                         pickle.load(open(op_path, 'rb')).get('successes'))

        event = spawn(accept, [201])
        ou.logger._clear()
        ou.run_once()
        err = event.wait()
        if err:
            raise err

        # we remove the async_pending and its containing suffix dir, but not
        # anything above that
        self.assertFalse(os.path.exists(op_path))
        self.assertFalse(os.path.exists(os.path.dirname(op_path)))
        self.assertTrue(
            os.path.exists(os.path.dirname(os.path.dirname(op_path))))
        self.assertEqual(ou.logger.get_increment_counts(), {
            'unlinks': 1,
            'successes': 1
        })
예제 #49
0
def load_data(dataset):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.path.split(__file__)[0], "..", "data", dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        from six.moves import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz')
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

    print('... loading data')

    # Load the dataset
    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix)
    # where each row corresponds to an example. target is a
    # numpy.ndarray of 1 dimension (vector) that has the same length as
    # the number of rows in the input. It should give the target
    # to the example with the same index in the input.

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
예제 #50
0
import sys
import math
import time

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

DATA_PATH = 'medical_data/'
DATA_FILE = DATA_PATH + 'medical_data.pickle'
INCLUDE_TEST_SET = False


print("Loading datasets...")
with open(DATA_FILE, 'rb') as f:
  save = pickle.load(f)
  train_X = save['train_data']
  train_Y = save['train_labels']
  val_X = save['val_data']
  val_Y = save['val_labels']

  if INCLUDE_TEST_SET:
    test_X = save['test_data']
    test_Y = save['test_labels']
  del save  # hint to help gc free up memory

print('Training set', train_X.shape, train_Y.shape)
print('Validation set', val_X.shape, val_Y.shape)
if INCLUDE_TEST_SET:
  print('Test set', test_X.shape, test_Y.shape)
예제 #51
0
def load_dict(filename_):
    with open(filename_, 'rb') as f:
        ret_di = pickle.load(f)
    return ret_di
예제 #52
0
def make_invariance_sets():
    print "\nMaking invariance datasets..."
    with open(DATA_PATH + 'art_data.pickle', 'rb') as f:
        save = pickle.load(f)
        val_X = save['val_data']
        val_Y = save['val_labels']
        del save  # hint to help gc free up memory

    n = len(val_X)
    translated_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                                  dtype=np.float32)
    flipped_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                               dtype=np.float32)
    inverted_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                                dtype=np.float32)
    dark_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                            dtype=np.float32)
    bright_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                              dtype=np.float32)
    high_contrast_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                                     dtype=np.float32)
    low_contrast_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS),
                                    dtype=np.float32)

    print "\tFlipping and inverting images..."
    val_X_RGB = (val_X * PIXEL_DEPTH) + PIXEL_DEPTH / 2.0
    for i in range(n):
        npimg = val_X_RGB[i, :, :, :]
        img = Image.fromarray(np.uint8(npimg))

        translated_val_X[i, :, :, :] = translate_img(npimg)
        flipped_val_X[i, :, :, :] = np.array(img.rotate(180))
        inverted_val_X[i, :, :, :] = np.array(PIL.ImageOps.invert(img))

        bright_mod = ImageEnhance.Brightness(img)
        dark_val_X[i, :, :, :] = bright_mod.enhance(0.75)
        bright_val_X[i, :, :, :] = bright_mod.enhance(1.5)

        contrast_mod = ImageEnhance.Contrast(img)
        low_contrast_val_X[i, :, :, :] = bright_mod.enhance(0.75)
        high_contrast_val_X[i, :, :, :] = bright_mod.enhance(1.5)

    print "\tScaling pixel values..."
    translated_val_X = scale_pixel_values(translated_val_X)
    flipped_val_X = scale_pixel_values(flipped_val_X)
    inverted_val_X = scale_pixel_values(inverted_val_X)
    dark_val_X = scale_pixel_values(dark_val_X)
    bright_val_X = scale_pixel_values(bright_val_X)
    high_contrast_val_X = scale_pixel_values(high_contrast_val_X)
    low_contrast_val_X = scale_pixel_values(low_contrast_val_X)

    print "\tPickling file..."
    save = {
        'translated_val_data': translated_val_X,
        'flipped_val_data': flipped_val_X,
        'inverted_val_data': inverted_val_X,
        'bright_val_data': bright_val_X,
        'dark_val_data': dark_val_X,
        'high_contrast_val_data': high_contrast_val_X,
        'low_contrast_val_data': low_contrast_val_X,
    }
    save_pickle_file('invariance_art_data.pickle', save)
예제 #53
0
def read_data(data_dir, force=False):
    def create_onehot_label(x):
        label = np.zeros((1, NUM_LABELS), dtype=np.float32)
        label[:, int(x)] = 1
        return label

    pickle_file = os.path.join(data_dir, "EmotionDetectorData.pickle")
    if force or not os.path.exists(pickle_file):
        train_filename = os.path.join(data_dir, "train.csv")
        data_frame = pd.read_csv(train_filename)
        data_frame['Pixels'] = data_frame['Pixels'].apply(
            lambda x: np.fromstring(x, sep=" ") / 255.0)
        data_frame = data_frame.dropna()
        print("Reading train.csv ...")

        train_images = np.vstack(data_frame['Pixels']).reshape(
            -1, IMAGE_SIZE, IMAGE_SIZE, 1)
        print(train_images.shape)
        train_labels = np.array(
            [list(map(create_onehot_label,
                      data_frame['Emotion'].values))]).reshape(-1, NUM_LABELS)
        print(train_labels.shape)

        permutations = np.random.permutation(train_images.shape[0])
        train_images = train_images[permutations]
        train_labels = train_labels[permutations]
        validation_percent = int(train_images.shape[0] * VALIDATION_PERCENT)
        validation_images = train_images[:validation_percent]
        validation_labels = train_labels[:validation_percent]
        train_images = train_images[validation_percent:]
        train_labels = train_labels[validation_percent:]

        print("Reading test.csv ...")
        test_filename = os.path.join(data_dir, "test.csv")
        data_frame = pd.read_csv(test_filename)
        data_frame['Pixels'] = data_frame['Pixels'].apply(
            lambda x: np.fromstring(x, sep=" ") / 255.0)
        data_frame = data_frame.dropna()
        test_images = np.vstack(data_frame['Pixels']).reshape(
            -1, IMAGE_SIZE, IMAGE_SIZE, 1)

        with open(pickle_file, "wb") as file:
            try:
                print('Picking ...')
                save = {
                    "train_images": train_images,
                    "train_labels": train_labels,
                    "validation_images": validation_images,
                    "validation_labels": validation_labels,
                    "test_images": test_images,
                }
                pickle.dump(save, file, pickle.HIGHEST_PROTOCOL)

            except:
                print("Unable to pickle file :/")

    with open(pickle_file, "rb") as file:
        save = pickle.load(file)
        train_images = save["train_images"]
        train_labels = save["train_labels"]
        validation_images = save["validation_images"]
        validation_labels = save["validation_labels"]
        test_images = save["test_images"]

    return train_images, train_labels, validation_images, validation_labels, test_images
예제 #54
0
    tf_summary_writer = tf and tf.summary.FileWriter(opt.checkpoint_path)
    infos = {}
    histories = {}
    if opt.start_from is not None:
        if opt.load_best_score == 1:
            model_path = os.path.join(opt.start_from, 'model-best.pth')
            info_path = os.path.join(opt.start_from,
                                     'infos_' + opt.id + '-best.pkl')
        else:
            model_path = os.path.join(opt.start_from, 'model.pth')
            info_path = os.path.join(opt.start_from,
                                     'infos_' + opt.id + '.pkl')

            # open old infos and check if models are compatible
        with open(info_path) as f:
            infos = cPickle.load(f)
            saved_model_opt = infos['opt']

        # opt.learning_rate = saved_model_opt.learning_rate
        print('Loading the model %s...' % (model_path))
        model.load_state_dict(torch.load(model_path))

        if os.path.isfile(
                os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')):
            with open(
                    os.path.join(opt.start_from,
                                 'histories_' + opt.id + '.pkl')) as f:
                histories = cPickle.load(f)

    if opt.decode_noc:
        model._reinit_word_weight(opt, dataset.ctoi, dataset.wtoi)
예제 #55
0
inferred_labels = model.fit(normalized_flux[validate_set], normalized_ivar[validate_set])
inferred_labels = np.vstack(inferred_labels).T

fig, ax = plt.subplots(3)
for i, label_name in enumerate(model.vectorizer.label_names):
    ax[i].scatter(labelled_set[label_name][validate_set], inferred_labels[:, i])


raise a


"""

# Fit individual spectra using two different models.
with open("apogee-rg-individual-visit-normalized.pickle", "rb") as fp:
    individual_visit_spectra = pickle.load(fp, encoding="latin-1")

latex_labels = {
    "TEFF": "T_{\\rm eff}",
    "LOGG": "\log{g}",
    "FE_H": "{\\rm [Fe/H]}"
}

models_to_compare = {
    #"model1": "gridsearch-2.0-3.0.model",
    "model2": "gridsearch-2.0-3.0-s2-heuristically-set.model"
}

for model_name, saved_filename in models_to_compare.items():

    scale_factor = saved_filename
def train(opt):

    # Load data
    loader = DataLoader(opt)
    opt.vocab_size = loader.vocab_size
    opt.seq_length = loader.seq_length

    # Tensorboard summaries (they're great!)
    tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path)

    # Load pretrained model, info file, histories file
    infos = {}
    histories = {}
    if opt.start_from is not None:
        with open(os.path.join(opt.start_from, 'infos_'+opt.id+'.pkl')) as f:
            infos = cPickle.load(f)
            saved_model_opt = infos['opt']
            need_be_same=["rnn_type", "rnn_size", "num_layers"]
            for checkme in need_be_same:
                assert vars(saved_model_opt)[checkme] == vars(opt)[checkme], "Command line argument and saved model disagree on '%s' " % checkme
        if os.path.isfile(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')):
            with open(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')) as f:
                histories = cPickle.load(f)
    iteration = infos.get('iter', 0)
    epoch = infos.get('epoch', 0)
    val_result_history = histories.get('val_result_history', {})
    loss_history = histories.get('loss_history', {})
    lr_history = histories.get('lr_history', {})
    #ss_prob_history = histories.get('ss_prob_history', {})
    loader.iterators = infos.get('iterators', loader.iterators)
    loader.split_ix = infos.get('split_ix', loader.split_ix)
    if opt.load_best_score == 1:
        best_val_score = infos.get('best_val_score', None)

    # Create model
    model = models.setup(opt, reverse = False).cuda()

    pretrained_dict = torch.load(opt.model)
    model.load_state_dict(pretrained_dict, strict=False)
    back_model = models.setup(opt, reverse= True).cuda()
    d_pretrained_dict = torch.load('./log_xe/d_model.pth')
    back_model.load_state_dict(d_pretrained_dict, strict=False)
    dp_model = model
    dp_model.train()
    back_model.train()
    # Loss function
    crit = utils.LanguageModelCriterion()
    rl_crit = utils.RewardCriterion()
 
    # Optimizer and learning rate adjustment flag
     
    optimizer = utils.build_optimizer(chain(model.parameters(), back_model.parameters()), opt)

    #back_optimizer = utils.build_optimizer(back_model.parameters(), opt)
    update_lr_flag = True

    # Load the optimizer
    if vars(opt).get('start_from', None) is not None and os.path.isfile(os.path.join(opt.start_from,"optimizer.pth")):
        optimizer.load_state_dict(torch.load(os.path.join(opt.start_from, 'optimizer.pth')))

    # Training loop
    while True:

        # Update learning rate once per epoch
        if update_lr_flag:

            # Assign the learning rate
            if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0:
                frac = (epoch - opt.learning_rate_decay_start) // opt.learning_rate_decay_every
                decay_factor = opt.learning_rate_decay_rate  ** frac
                opt.current_lr = opt.learning_rate * decay_factor
            else:
                opt.current_lr = opt.learning_rate
            utils.set_lr(optimizer, opt.current_lr)

            # Assign the scheduled sampling prob
            if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
                frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every
                #opt.ss_prob = min(opt.scheduled_sampling_increase_prob  * frac, opt.scheduled_sampling_max_prob)
                #model.ss_prob = opt.ss_prob

            # If start self critical training
            if opt.self_critical_after != -1 and epoch >= opt.self_critical_after:
                sc_flag = True
                init_scorer(opt.cached_tokens)
            else:
                sc_flag = False

            update_lr_flag = False
                
        # Load data from train split (0)
        start = time.time()
        data = loader.get_batch('train')
        data_time = time.time() - start
        start = time.time()

        # Unpack data
        torch.cuda.synchronize()
        tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['dist'], data['masks'], data['att_masks']]
        tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp]
        fc_feats, att_feats, labels, dist_label, masks, att_masks = tmp
        mid_data = data['labels']
        reverse_labels = np.reshape(mid_data, (-1, 6*30))
        reverse_labels = np.flip(reverse_labels, 1).copy()
        masks = masks.view((-1, 6*30))
       
        mid_mask = data['masks']
        reverse_masks = np.reshape(mid_mask, (-1, 6*30))
        reverse_masks = np.flip(reverse_masks, 1).copy()
        batchsize = fc_feats.size(0)
        # Forward pass and loss
        optimizer.zero_grad()
        #print (torch.sum(labels!=0), torch.sum(masks!=0))
        if 1:
          if 1:
            wordact, x_all = dp_model(fc_feats, att_feats, labels)
            wordact = F.log_softmax(wordact, 1)
            mask = masks[:,1:].contiguous()
            wordact = wordact[:,:,:-1]
            #wordact_t = wordact.permute(0, 2, 1).contiguous()
            #wordact_t = wordact_t.view(wordact_t.size(0) * wordact_t.size(1), -1)
            labels = labels.contiguous().view(-1, 6*30).cpu()
            wordclass_v = labels[:, 1:]
            #wordclass_t = wordclass_v.contiguous().view(\
            #   wordclass_v.size(0) * wordclass_v.size(1), 1)
            #maskids = torch.nonzero(mask.view(-1).cpu()).numpy().reshape(-1)
            #loss = F.cross_entropy(wordact_t[maskids, ...], \
            #   wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])).cuda()
            loss = crit(wordact.transpose(2,1), wordclass_v, mask.cpu())
            
            #back_optimizer.zero_grad()
            labels_flip = torch.from_numpy(reverse_labels).cuda().view((-1, 6, 30))
            wordact, x_all_flip = back_model(fc_feats, att_feats, labels_flip)
            wordact = F.log_softmax(wordact, 1)
            reverse_masks = torch.from_numpy(reverse_masks).cuda()
            reverse_masks = reverse_masks[:, 1:].contiguous()
            wordact = wordact[:,:,:-1]
            #wordact_t = wordact.permute(0, 2, 1).contiguous()
            #wordact_t = wordact_t.view(wordact_t.size(0) * wordact_t.size(1), -1)
            labels_flip = labels_flip.contiguous().view(-1, 6*30).cpu()
            wordclass_v = labels_flip[:, 1:]
            #wordclass_t = wordclass_v.contiguous().view(\
            #   wordclass_v.size(0) * wordclass_v.size(1), 1)
            #maskids = torch.nonzero(reverse_masks.view(-1).cpu()).numpy().reshape(-1)
            #loss_flip = F.cross_entropy(wordact_t[maskids, ...], \
            #   wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])).cuda()
            loss_flip = crit(wordact.transpose(2,1), wordclass_v, reverse_masks.cpu())

            
            
            #utils.clip_gradient(optimizer, opt.grad_clip)

            #optimizer.step()
            x_all = x_all[:,:,:-1]
            x_all_flip = x_all_flip[:,:,:-1]

            #x_all = x_all[:, :, :-1]
            #x_all_flip = x_all_flip[:, :,1:]

            idx = [i for i in range(x_all_flip.size()[2]-1, -1, -1)]
            idx = torch.LongTensor(idx[1:])
            idx = Variable(idx).cuda()
            invert_backstates = x_all_flip.index_select(2, idx)
            invert_backstates = invert_backstates.detach()
            l2_loss = ((x_all[:, :, :-1] - invert_backstates)** 2).mean()
            
            all_loss = loss.cuda() #+ loss_flip.cuda() + 3*l2_loss

            all_loss.backward()

            #l2_loss.backward()
            utils.clip_gradient(optimizer, opt.grad_clip)
            optimizer.step()
            
            train_loss = loss.item()
            torch.cuda.synchronize()
            total_time = time.time() - start
          if 1:
            if iteration % opt.print_freq == 1:
              print('Read data:', time.time() - start)
              if not sc_flag:
                print("iter {} (epoch {}), train_loss = {:.3f},loss_reg = {:.3f}, loss_flip = {:.3f}, data_time = {:.3f}, time/batch = {:.3f}" \
                    .format(iteration, epoch, train_loss, l2_loss, loss_flip, data_time, total_time))
              else:
                print("iter {} (epoch {}), avg_reward = {:.3f}, data_time = {:.3f}, time/batch = {:.3f}" \
                    .format(iteration, epoch, np.mean(reward[:,0]), data_time, total_time))

          # Update the iteration and epoch
          iteration += 1
          if data['bounds']['wrapped']:
            epoch += 1
            update_lr_flag = True

          # Write the training loss summary
          if (iteration % opt.losses_log_every == 0):
            add_summary_value(tb_summary_writer, 'train_loss', train_loss, iteration)
            add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration)
            #add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration)
            if sc_flag:
                add_summary_value(tb_summary_writer, 'avg_reward', np.mean(reward[:,0]), iteration)
            loss_history[iteration] = train_loss if not sc_flag else np.mean(reward[:,0])
            lr_history[iteration] = opt.current_lr
            #ss_prob_history[iteration] = model.ss_prob

        # Validate and save model 
          if (iteration % opt.save_checkpoint_every == 0):
            checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth')
            torch.save(model.state_dict(), checkpoint_path)
            checkpoint_path = os.path.join(opt.checkpoint_path, 'd_model.pth')
            torch.save(back_model.state_dict(), checkpoint_path)
            print("model saved to {}".format(checkpoint_path))
            optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth')
            torch.save(optimizer.state_dict(), optimizer_path)
            # Evaluate model
            eval_kwargs = {'split': 'test',
                            'dataset': opt.input_json}
            eval_kwargs.update(vars(opt))
            val_loss, predictions, lang_stats = eval_utils.eval_split(dp_model, crit, loader, eval_kwargs)
            # Write validation result into summary
            add_summary_value(tb_summary_writer, 'validation loss', val_loss, iteration)
            if lang_stats is not None:
                for k,v in lang_stats.items():
                    add_summary_value(tb_summary_writer, k, v, iteration)
            val_result_history[iteration] = {'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions}

            # Our metric is CIDEr if available, otherwise validation loss
            if opt.language_eval == 1:
                current_score = lang_stats['CIDEr']
            else:
                current_score = - val_loss

            # Save model in checkpoint path 
            best_flag = False
            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                best_flag = True
            checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth')
            torch.save(model.state_dict(), checkpoint_path)
            checkpoint_path = os.path.join(opt.checkpoint_path, 'd_model.pth')
            torch.save(back_model.state_dict(), checkpoint_path)
            print("model saved to {}".format(checkpoint_path))
            optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth')
            torch.save(optimizer.state_dict(), optimizer_path)

            # Dump miscalleous informations
            infos['iter'] = iteration
            infos['epoch'] = epoch
            infos['iterators'] = loader.iterators
            infos['split_ix'] = loader.split_ix
            infos['best_val_score'] = best_val_score
            infos['opt'] = opt
            infos['vocab'] = loader.get_vocab()
            histories['val_result_history'] = val_result_history
            histories['loss_history'] = loss_history
            histories['lr_history'] = lr_history
            #histories['ss_prob_history'] = ss_prob_history
            with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'.pkl'), 'wb') as f:
                cPickle.dump(infos, f)
            with open(os.path.join(opt.checkpoint_path, 'histories_'+opt.id+'.pkl'), 'wb') as f:
                cPickle.dump(histories, f)

            # Save model to unique file if new best model
            if best_flag:
                model_fname = 'model-best-i{:05d}-score{:.4f}.pth'.format(iteration, best_val_score)
                infos_fname = 'model-best-i{:05d}-infos.pkl'.format(iteration)
                checkpoint_path = os.path.join(opt.checkpoint_path, model_fname)
                torch.save(model.state_dict(), checkpoint_path)
                checkpoint_path = os.path.join(opt.checkpoint_path, 'd_model-best.pth')
                torch.save(back_model.state_dict(), checkpoint_path)
                print("model saved to {}".format(checkpoint_path))
                with open(os.path.join(opt.checkpoint_path, infos_fname), 'wb') as f:
                    cPickle.dump(infos, f) 
예제 #57
0
# THIS IMPLEMENTATION WILL INCLUDE CONFUSION MATRIX
# The input will now be able for different size of image
import numpy as np
from six.moves import cPickle as pickle
import time
import filters
import gabor_filter

with open('C:/data/train_data/data.pickle', 'rb') as f:
    tr_dat = pickle.load(f)
with open('C:/data/train_data/label.pickle', 'rb') as f:
    tr_lab = pickle.load(f)
with open('C:/data/test_data/data.pickle', 'rb') as f:
    te_dat = pickle.load(f)
with open('C:/data/test_data/label.pickle', 'rb') as f:
    te_lab = pickle.load(f)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sig_deri(x):
    return x * (1 - x)

# 1st layer initialization
layer1_ch = 2
CNN_layer1_map = np.ndarray(shape=(layer1_ch, 8, 8))
CNN_layer1_weight = (np.random.rand(layer1_ch, 3, 3) - 0.5) * 2.5
CNN_layer1_bias = np.random.rand(layer1_ch, 8, 8)
CNN_layer1_stride = 2

# 2nd layer initialization
예제 #58
0
def pred_eval(
    config,
    predictor,
    test_data,
    imdb_test,
    vis=False,
    ignore_cache=None,
    logger=None,
    pairdb=None,
):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb_test: image database
    :param vis: controls visualization
    :param ignore_cache: ignore the saved cache file
    :param logger: the logger instance
    :return:
    """
    print(imdb_test.result_path)
    print("test iter size: ", config.TEST.test_iter)
    pose_err_file = os.path.join(
        imdb_test.result_path,
        imdb_test.name + "_pose_iter{}.pkl".format(config.TEST.test_iter),
    )
    if os.path.exists(pose_err_file) and not ignore_cache and not vis:
        with open(pose_err_file, "rb") as fid:
            if six.PY3:
                [
                    all_rot_err,
                    all_trans_err,
                    all_poses_est,
                    all_poses_gt,
                ] = cPickle.load(fid, encoding="latin1")
            else:
                [
                    all_rot_err,
                    all_trans_err,
                    all_poses_est,
                    all_poses_gt,
                ] = cPickle.load(fid)
        imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger)
        pose_add_plots_dir = os.path.join(imdb_test.result_path, "add_plots")
        mkdir_if_missing(pose_add_plots_dir)
        imdb_test.evaluate_pose_add(
            config,
            all_poses_est,
            all_poses_gt,
            output_dir=pose_add_plots_dir,
            logger=logger,
        )
        pose_arp2d_plots_dir = os.path.join(imdb_test.result_path,
                                            "arp_2d_plots")
        mkdir_if_missing(pose_arp2d_plots_dir)
        imdb_test.evaluate_pose_arp_2d(
            config,
            all_poses_est,
            all_poses_gt,
            output_dir=pose_arp2d_plots_dir,
            logger=logger,
        )
        return

    assert vis or not test_data.shuffle
    assert config.TEST.BATCH_PAIRS == 1
    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    num_pairs = len(pairdb)
    height = 480
    width = 640

    data_time, net_time, post_time = 0.0, 0.0, 0.0

    sum_EPE_all = 0.0
    num_inst_all = 0.0
    sum_EPE_viz = 0.0
    num_inst_viz = 0.0
    sum_EPE_vizbg = 0.0
    num_inst_vizbg = 0.0
    sum_PoseErr = [
        np.zeros((len(imdb_test.classes) + 1, 2))
        for batch_idx in range(config.TEST.test_iter)
    ]

    all_rot_err = [[[] for j in range(config.TEST.test_iter)]
                   for batch_idx in range(len(imdb_test.classes))
                   ]  # num_cls x test_iter
    all_trans_err = [[[] for j in range(config.TEST.test_iter)]
                     for batch_idx in range(len(imdb_test.classes))]

    all_poses_est = [[[] for j in range(config.TEST.test_iter)]
                     for batch_idx in range(len(imdb_test.classes))]
    all_poses_gt = [[[] for j in range(config.TEST.test_iter)]
                    for batch_idx in range(len(imdb_test.classes))]

    num_inst = np.zeros(len(imdb_test.classes) + 1)

    K = config.dataset.INTRINSIC_MATRIX
    if (config.TEST.test_iter > 1 or config.TEST.VISUALIZE) and True:
        print(
            "************* start setup render_glumpy environment... ******************"
        )
        if config.dataset.dataset.startswith("ModelNet"):
            from lib.render_glumpy.render_py_light_modelnet_multi import (
                Render_Py_Light_ModelNet_Multi, )

            modelnet_root = config.modelnet_root
            texture_path = os.path.join(modelnet_root, "gray_texture.png")

            model_path_list = [
                os.path.join(config.dataset.model_dir,
                             "{}.obj".format(model_name))
                for model_name in config.dataset.class_name
            ]
            render_machine = Render_Py_Light_ModelNet_Multi(
                model_path_list,
                texture_path,
                K,
                width,
                height,
                config.dataset.ZNEAR,
                config.dataset.ZFAR,
                brightness_ratios=[0.7],
            )
        else:
            render_machine = Render_Py(
                config.dataset.model_dir,
                config.dataset.class_name,
                K,
                width,
                height,
                config.dataset.ZNEAR,
                config.dataset.ZFAR,
            )

        def render(render_machine, pose, cls_idx, K=None):
            if config.dataset.dataset.startswith("ModelNet"):
                idx = 2
                # generate random light_position
                if idx % 6 == 0:
                    light_position = [1, 0, 1]
                elif idx % 6 == 1:
                    light_position = [1, 1, 1]
                elif idx % 6 == 2:
                    light_position = [0, 1, 1]
                elif idx % 6 == 3:
                    light_position = [-1, 1, 1]
                elif idx % 6 == 4:
                    light_position = [-1, 0, 1]
                elif idx % 6 == 5:
                    light_position = [0, 0, 1]
                else:
                    raise Exception("???")
                light_position = np.array(light_position) * 0.5
                # inverse yz
                light_position[0] += pose[0, 3]
                light_position[1] -= pose[1, 3]
                light_position[2] -= pose[2, 3]

                colors = np.array([1, 1, 1])  # white light
                intensity = np.random.uniform(0.9, 1.1, size=(3, ))
                colors_randk = 0
                light_intensity = colors[colors_randk] * intensity

                # randomly choose a render machine
                rm_randk = 0  # random.randint(0, len(brightness_ratios) - 1)
                rgb_gl, depth_gl = render_machine.render(
                    cls_idx,
                    pose[:3, :3],
                    pose[:3, 3],
                    light_position,
                    light_intensity,
                    brightness_k=rm_randk,
                    r_type="mat",
                )
                rgb_gl = rgb_gl.astype("uint8")
            else:
                rgb_gl, depth_gl = render_machine.render(cls_idx,
                                                         pose[:3, :3],
                                                         pose[:, 3],
                                                         r_type="mat",
                                                         K=K)
                rgb_gl = rgb_gl.astype("uint8")
            return rgb_gl, depth_gl

        print(
            "***************setup render_glumpy environment succeed ******************"
        )

    if config.TEST.PRECOMPUTED_ICP:
        print("precomputed_ICP")
        config.TEST.test_iter = 1
        all_rot_err = [[[] for j in range(1)]
                       for batch_idx in range(len(imdb_test.classes))]
        all_trans_err = [[[] for j in range(1)]
                         for batch_idx in range(len(imdb_test.classes))]

        all_poses_est = [[[] for j in range(1)]
                         for batch_idx in range(len(imdb_test.classes))]
        all_poses_gt = [[[] for j in range(1)]
                        for batch_idx in range(len(imdb_test.classes))]

        xy_trans_err = [[[] for j in range(1)]
                        for batch_idx in range(len(imdb_test.classes))]
        z_trans_err = [[[] for j in range(1)]
                       for batch_idx in range(len(imdb_test.classes))]
        for idx in range(len(pairdb)):
            pose_path = pairdb[idx]["depth_rendered"][:-10] + "-pose_icp.txt"
            pose_rendered_update = np.loadtxt(pose_path, skiprows=1)
            pose_observed = pairdb[idx]["pose_observed"]
            r_dist_est, t_dist_est = calc_rt_dist_m(pose_rendered_update,
                                                    pose_observed)
            xy_dist = np.linalg.norm(pose_rendered_update[:2, -1] -
                                     pose_observed[:2, -1])
            z_dist = np.linalg.norm(pose_rendered_update[-1, -1] -
                                    pose_observed[-1, -1])
            print(
                "{}: r_dist_est: {}, t_dist_est: {}, xy_dist: {}, z_dist: {}".
                format(idx, r_dist_est, t_dist_est, xy_dist, z_dist))
            class_id = imdb_test.classes.index(pairdb[idx]["gt_class"])
            # store poses estimation and gt
            all_poses_est[class_id][0].append(pose_rendered_update)
            all_poses_gt[class_id][0].append(pairdb[idx]["pose_observed"])
            all_rot_err[class_id][0].append(r_dist_est)
            all_trans_err[class_id][0].append(t_dist_est)
            xy_trans_err[class_id][0].append(xy_dist)
            z_trans_err[class_id][0].append(z_dist)
        all_rot_err = np.array(all_rot_err)
        all_trans_err = np.array(all_trans_err)
        print("rot = {} +/- {}".format(np.mean(all_rot_err[class_id][0]),
                                       np.std(all_rot_err[class_id][0])))
        print("trans = {} +/- {}".format(np.mean(all_trans_err[class_id][0]),
                                         np.std(all_trans_err[class_id][0])))
        num_list = all_trans_err[class_id][0]
        print("xyz: {:.2f} +/- {:.2f}".format(
            np.mean(num_list) * 100,
            np.std(num_list) * 100))
        num_list = xy_trans_err[class_id][0]
        print("xy: {:.2f} +/- {:.2f}".format(
            np.mean(num_list) * 100,
            np.std(num_list) * 100))
        num_list = z_trans_err[class_id][0]
        print("z: {:.2f} +/- {:.2f}".format(
            np.mean(num_list) * 100,
            np.std(num_list) * 100))

        imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger)
        pose_add_plots_dir = os.path.join(imdb_test.result_path,
                                          "add_plots_precomputed_ICP")
        mkdir_if_missing(pose_add_plots_dir)
        imdb_test.evaluate_pose_add(
            config,
            all_poses_est,
            all_poses_gt,
            output_dir=pose_add_plots_dir,
            logger=logger,
        )
        pose_arp2d_plots_dir = os.path.join(imdb_test.result_path,
                                            "arp_2d_plots_precomputed_ICP")
        mkdir_if_missing(pose_arp2d_plots_dir)
        imdb_test.evaluate_pose_arp_2d(
            config,
            all_poses_est,
            all_poses_gt,
            output_dir=pose_arp2d_plots_dir,
            logger=logger,
        )
        return

    if config.TEST.BEFORE_ICP:
        print("before_ICP")
        config.TEST.test_iter = 1
        all_rot_err = [[[] for j in range(1)]
                       for batch_idx in range(len(imdb_test.classes))]
        all_trans_err = [[[] for j in range(1)]
                         for batch_idx in range(len(imdb_test.classes))]

        all_poses_est = [[[] for j in range(1)]
                         for batch_idx in range(len(imdb_test.classes))]
        all_poses_gt = [[[] for j in range(1)]
                        for batch_idx in range(len(imdb_test.classes))]

        xy_trans_err = [[[] for j in range(1)]
                        for batch_idx in range(len(imdb_test.classes))]
        z_trans_err = [[[] for j in range(1)]
                       for batch_idx in range(len(imdb_test.classes))]
        for idx in range(len(pairdb)):
            pose_path = pairdb[idx]["depth_rendered"][:-10] + "-pose.txt"
            pose_rendered_update = np.loadtxt(pose_path, skiprows=1)
            pose_observed = pairdb[idx]["pose_observed"]
            r_dist_est, t_dist_est = calc_rt_dist_m(pose_rendered_update,
                                                    pose_observed)
            xy_dist = np.linalg.norm(pose_rendered_update[:2, -1] -
                                     pose_observed[:2, -1])
            z_dist = np.linalg.norm(pose_rendered_update[-1, -1] -
                                    pose_observed[-1, -1])
            class_id = imdb_test.classes.index(pairdb[idx]["gt_class"])
            # store poses estimation and gt
            all_poses_est[class_id][0].append(pose_rendered_update)
            all_poses_gt[class_id][0].append(pairdb[idx]["pose_observed"])
            all_rot_err[class_id][0].append(r_dist_est)
            all_trans_err[class_id][0].append(t_dist_est)
            xy_trans_err[class_id][0].append(xy_dist)
            z_trans_err[class_id][0].append(z_dist)

        all_trans_err = np.array(all_trans_err)
        imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger)
        pose_add_plots_dir = os.path.join(imdb_test.result_path,
                                          "add_plots_before_ICP")
        mkdir_if_missing(pose_add_plots_dir)
        imdb_test.evaluate_pose_add(
            config,
            all_poses_est,
            all_poses_gt,
            output_dir=pose_add_plots_dir,
            logger=logger,
        )
        pose_arp2d_plots_dir = os.path.join(imdb_test.result_path,
                                            "arp_2d_plots_before_ICP")
        mkdir_if_missing(pose_arp2d_plots_dir)
        imdb_test.evaluate_pose_arp_2d(
            config,
            all_poses_est,
            all_poses_gt,
            output_dir=pose_arp2d_plots_dir,
            logger=logger,
        )
        return

    # ------------------------------------------------------------------------------
    t_start = time.time()
    t = time.time()
    for idx, data_batch in enumerate(test_data):
        if np.sum(pairdb[idx]
                  ["pose_rendered"]) == -12:  # NO POINT VALID IN INIT POSE
            print(idx)
            class_id = imdb_test.classes.index(pairdb[idx]["gt_class"])
            for pose_iter_idx in range(config.TEST.test_iter):
                all_poses_est[class_id][pose_iter_idx].append(
                    pairdb[idx]["pose_rendered"])
                all_poses_gt[class_id][pose_iter_idx].append(
                    pairdb[idx]["pose_observed"])

                r_dist = 1000
                t_dist = 1000
                all_rot_err[class_id][pose_iter_idx].append(r_dist)
                all_trans_err[class_id][pose_iter_idx].append(t_dist)
                sum_PoseErr[pose_iter_idx][class_id, :] += np.array(
                    [r_dist, t_dist])
                sum_PoseErr[pose_iter_idx][-1, :] += np.array([r_dist, t_dist])
                # post process
            if idx % 50 == 0:
                print_and_log(
                    "testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s".
                    format(
                        (idx + 1),
                        num_pairs,
                        data_time / ((idx + 1) * test_data.batch_size),
                        net_time / ((idx + 1) * test_data.batch_size),
                        post_time / ((idx + 1) * test_data.batch_size),
                    ),
                    logger,
                )
            print("in test: NO POINT_VALID IN rendered")
            continue
        data_time += time.time() - t

        t = time.time()

        pose_rendered = pairdb[idx]["pose_rendered"]
        if np.sum(pose_rendered) == -12:
            print(idx)
            class_id = imdb_test.classes.index(pairdb[idx]["gt_class"])
            num_inst[class_id] += 1
            num_inst[-1] += 1
            for pose_iter_idx in range(config.TEST.test_iter):
                all_poses_est[class_id][pose_iter_idx].append(pose_rendered)
                all_poses_gt[class_id][pose_iter_idx].append(
                    pairdb[idx]["pose_observed"])

            # post process
            if idx % 50 == 0:
                print_and_log(
                    "testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s".
                    format(
                        (idx + 1),
                        num_pairs,
                        data_time / ((idx + 1) * test_data.batch_size),
                        net_time / ((idx + 1) * test_data.batch_size),
                        post_time / ((idx + 1) * test_data.batch_size),
                    ),
                    logger,
                )

            t = time.time()
            continue

        output_all = predictor.predict(data_batch)
        net_time += time.time() - t

        t = time.time()
        rst_iter = []
        for output in output_all:
            cur_rst = {}
            cur_rst["se3"] = np.squeeze(
                output["se3_output"].asnumpy()).astype("float32")

            if not config.TEST.FAST_TEST and config.network.PRED_FLOW:
                cur_rst["flow"] = np.squeeze(
                    output["flow_est_crop_output"].asnumpy().transpose(
                        (2, 3, 1, 0))).astype("float16")
            else:
                cur_rst["flow"] = None
            if config.network.PRED_MASK and config.TEST.UPDATE_MASK not in [
                    "init", "box_rendered"
            ]:
                mask_pred = np.squeeze(
                    output["mask_observed_pred_output"].asnumpy()).astype(
                        "float32")
                cur_rst["mask_pred"] = mask_pred

            rst_iter.append(cur_rst)

        post_time += time.time() - t
        # sample_ratio = 1  # 0.01
        for batch_idx in range(0, test_data.batch_size):
            # if config.TEST.VISUALIZE and not (r_dist>15 and t_dist>0.05):
            #     continue # 3388, 5326
            # calculate the flow error --------------------------------------------
            t = time.time()
            if config.network.PRED_FLOW and not config.TEST.FAST_TEST:
                # evaluate optical flow
                flow_gt = par_generate_gt(config, pairdb[idx])
                if config.network.PRED_FLOW:
                    all_diff = calc_EPE_one_pair(rst_iter[batch_idx], flow_gt,
                                                 "flow")
                sum_EPE_all += all_diff["epe_all"]
                num_inst_all += all_diff["num_all"]
                sum_EPE_viz += all_diff["epe_viz"]
                num_inst_viz += all_diff["num_viz"]
                sum_EPE_vizbg += all_diff["epe_vizbg"]
                num_inst_vizbg += all_diff["num_vizbg"]

            # calculate the se3 error ---------------------------------------------
            # evaluate se3 estimation
            pose_rendered = pairdb[idx]["pose_rendered"]
            class_id = imdb_test.classes.index(pairdb[idx]["gt_class"])
            num_inst[class_id] += 1
            num_inst[-1] += 1
            post_time += time.time() - t

            # iterative refine se3 estimation --------------------------------------------------
            for pose_iter_idx in range(config.TEST.test_iter):
                t = time.time()
                pose_rendered_update = RT_transform(
                    pose_rendered,
                    rst_iter[0]["se3"][:-3],
                    rst_iter[0]["se3"][-3:],
                    config.dataset.trans_means,
                    config.dataset.trans_stds,
                    config.network.ROT_COORD,
                )

                # calculate error
                r_dist, t_dist = calc_rt_dist_m(pose_rendered_update,
                                                pairdb[idx]["pose_observed"])

                # store poses estimation and gt
                all_poses_est[class_id][pose_iter_idx].append(
                    pose_rendered_update)
                all_poses_gt[class_id][pose_iter_idx].append(
                    pairdb[idx]["pose_observed"])

                all_rot_err[class_id][pose_iter_idx].append(r_dist)
                all_trans_err[class_id][pose_iter_idx].append(t_dist)
                sum_PoseErr[pose_iter_idx][class_id, :] += np.array(
                    [r_dist, t_dist])
                sum_PoseErr[pose_iter_idx][-1, :] += np.array([r_dist, t_dist])
                if config.TEST.VISUALIZE:
                    print("idx {}, iter {}: rError: {}, tError: {}".format(
                        idx + batch_idx, pose_iter_idx + 1, r_dist, t_dist))

                post_time += time.time() - t

                # # if more than one iteration
                if pose_iter_idx < (config.TEST.test_iter -
                                    1) or config.TEST.VISUALIZE:
                    t = time.time()
                    # get refined image
                    K_path = pairdb[idx]["image_observed"][:-10] + "-K.txt"
                    if os.path.exists(K_path):
                        K = np.loadtxt(K_path)
                    image_refined, depth_refined = render(
                        render_machine,
                        pose_rendered_update,
                        config.dataset.class_name.index(
                            pairdb[idx]["gt_class"]),
                        K=K,
                    )
                    image_refined = image_refined[:, :, :3]

                    # update minibatch
                    update_package = [{
                        "image_rendered": image_refined,
                        "src_pose": pose_rendered_update
                    }]
                    if config.network.INPUT_DEPTH:
                        update_package[0]["depth_rendered"] = depth_refined
                    if config.network.INPUT_MASK:
                        mask_rendered_refined = np.zeros(depth_refined.shape)
                        mask_rendered_refined[depth_refined > 0.2] = 1
                        update_package[0][
                            "mask_rendered"] = mask_rendered_refined
                        if config.network.PRED_MASK:
                            # init, box_rendered, mask_rendered, box_observed, mask_observed
                            if config.TEST.UPDATE_MASK == "box_rendered":
                                input_names = [
                                    blob_name[0]
                                    for blob_name in data_batch.provide_data[0]
                                ]
                                update_package[0][
                                    "mask_observed"] = np.squeeze(
                                        data_batch.data[0][input_names.index(
                                            "mask_rendered")].asnumpy()
                                        [batch_idx])  # noqa
                            elif config.TEST.UPDATE_MASK == "init":
                                pass
                            else:
                                raise Exception(
                                    "Unknown UPDATE_MASK type: {}".format(
                                        config.network.UPDATE_MASK))

                    pose_rendered = pose_rendered_update
                    data_batch = update_data_batch(config, data_batch,
                                                   update_package)

                    data_time += time.time() - t

                    # forward and get rst
                    if pose_iter_idx < config.TEST.test_iter - 1:
                        t = time.time()
                        output_all = predictor.predict(data_batch)
                        net_time += time.time() - t

                        t = time.time()
                        rst_iter = []
                        for output in output_all:
                            cur_rst = {}
                            if config.network.REGRESSOR_NUM == 1:
                                cur_rst["se3"] = np.squeeze(
                                    output["se3_output"].asnumpy()).astype(
                                        "float32")

                            if not config.TEST.FAST_TEST and config.network.PRED_FLOW:
                                cur_rst["flow"] = np.squeeze(
                                    output["flow_est_crop_output"].asnumpy().
                                    transpose((2, 3, 1, 0))).astype("float16")
                            else:
                                cur_rst["flow"] = None

                            if (config.network.PRED_MASK
                                    and config.TEST.UPDATE_MASK
                                    not in ["init", "box_rendered"]):
                                mask_pred = np.squeeze(
                                    output["mask_observed_pred_output"].
                                    asnumpy()).astype("float32")
                                cur_rst["mask_pred"] = mask_pred

                            rst_iter.append(cur_rst)
                            post_time += time.time() - t

        # post process
        if idx % 50 == 0:
            print_and_log(
                "testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s".
                format(
                    (idx + 1),
                    num_pairs,
                    data_time / ((idx + 1) * test_data.batch_size),
                    net_time / ((idx + 1) * test_data.batch_size),
                    post_time / ((idx + 1) * test_data.batch_size),
                ),
                logger,
            )

        t = time.time()

    all_rot_err = np.array(all_rot_err)
    all_trans_err = np.array(all_trans_err)

    # save inference results
    if not config.TEST.VISUALIZE:
        with open(pose_err_file, "wb") as f:
            print("saving result cache to {}".format(pose_err_file))
            cPickle.dump(
                [all_rot_err, all_trans_err, all_poses_est, all_poses_gt],
                f,
                protocol=2)
            print("done")

    if config.network.PRED_FLOW:
        print_and_log("evaluate flow:", logger)
        print_and_log(
            "EPE all: {}".format(sum_EPE_all / max(num_inst_all, 1.0)), logger)
        print_and_log(
            "EPE ignore unvisible: {}".format(
                sum_EPE_vizbg / max(num_inst_vizbg, 1.0)), logger)
        print_and_log(
            "EPE visible: {}".format(sum_EPE_viz / max(num_inst_viz, 1.0)),
            logger)

    print_and_log("evaluate pose:", logger)
    imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger)
    # evaluate pose add
    pose_add_plots_dir = os.path.join(imdb_test.result_path, "add_plots")
    mkdir_if_missing(pose_add_plots_dir)
    imdb_test.evaluate_pose_add(config,
                                all_poses_est,
                                all_poses_gt,
                                output_dir=pose_add_plots_dir,
                                logger=logger)
    pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, "arp_2d_plots")
    mkdir_if_missing(pose_arp2d_plots_dir)
    imdb_test.evaluate_pose_arp_2d(config,
                                   all_poses_est,
                                   all_poses_gt,
                                   output_dir=pose_arp2d_plots_dir,
                                   logger=logger)

    print_and_log("using {} seconds in total".format(time.time() - t_start),
                  logger)
예제 #59
0
    "title": "BJ’s Restaurant & Brewhouse",
    "location": "Jacksonville",
    "employees": "Employees are " + employee_sentiment + "." +
    "The company then bought 26.",
    "customers": "Customers are happy. The company then bought 26.",
    "shareholders": "Shareholders are happy. The company then bought 26.",
    "management": "Management is performing well. The company then bought 26."
}
#
colors = {'background': '#111111', 'text': '#7FDBFF'}

my_path = os.path.abspath(os.path.dirname('__file__'))

path = os.path.join(my_path, "data/cpickle/")

first_dict = pickle.load(open(path + "first_page.p", "rb"))

figure = pf.figs_polar(first_dict["code_start"], "bench",
                       first_dict["code_start"])

comp_plot_output = figure

fig = figs(first_dict["code_start"], first_dict["the_benchmark"])
stock_plot_output = fig

df_perf_summary = first_dict["df_perf_summary_output"]


def make_dash_table(df):
    ''' Return a dash definitio of an HTML table for a Pandas dataframe '''
    table = []
예제 #60
0
def load_dataset(filename):
    """ Load the dataset from the filename. We assume it is a pickled Bunch """
    with open(filename, "rb") as fid:
        data = cPickle.load(fid)
    return data