Beispiel #1
0
def main():    
    h5file = '/root/data/pcallier/amazon/temp.hd5'
    amzn_path = '/root/data/pcallier/amazon/reviews_Health_and_Personal_Care.json.gz'
    #azbw = AmazonBatchWriter(amzn_path, h5file)
    #azbw.run()

    from neon.backends.nervanagpu import NervanaGPU
    ng = NervanaGPU(0, device_id=1)

    NervanaObject.be = ng
    ng.bsz = 128
    train_set = DiskDataIterator(lambda: batcher(load_data('/root/data/amazon/test_amazon.json.gz')), 3000, 128, nvocab=67)
    # random examples from each
    for bidx, (X_batch, y_batch) in enumerate(train_set):
        print "Batch {}:".format(bidx)
        #print X_batch.get().T.sum(axis=1)
        reviewnum = input("Pick review index to fetch and decode: ")
        review = from_one_hot(X_batch.get().T[reviewnum].reshape(67, -1))
        print ''.join(review)[::-1]
Beispiel #2
0
def main():
    h5file = '/root/data/pcallier/amazon/temp.hd5'
    amzn_path = '/root/data/pcallier/amazon/reviews_Health_and_Personal_Care.json.gz'
    #azbw = AmazonBatchWriter(amzn_path, h5file)
    #azbw.run()

    from neon.backends.nervanagpu import NervanaGPU
    ng = NervanaGPU(0, device_id=1)

    NervanaObject.be = ng
    ng.bsz = 128
    train_set = DiskDataIterator(
        lambda: batcher(load_data('/root/data/amazon/test_amazon.json.gz')),
        3000,
        128,
        nvocab=67)
    # random examples from each
    for bidx, (X_batch, y_batch) in enumerate(train_set):
        print "Batch {}:".format(bidx)
        #print X_batch.get().T.sum(axis=1)
        reviewnum = input("Pick review index to fetch and decode: ")
        review = from_one_hot(X_batch.get().T[reviewnum].reshape(67, -1))
        print ''.join(review)[::-1]
Beispiel #3
0
        np.set_printoptions(threshold=np.nan)
        print "Batch properties:"
        print "Shape (data): {}".format(data.shape)
        print "Shape (label): {}".format(label.shape)
        print "Type: {}".format(type(data))
        print
        print "First record of first batch:"
        print "Type (1 level in): {}".format(type(data[0]))
        print "Type of record (2 levels in): {}".format(type(data[0,0]))
        print data[0,0]
        print "Sentiment label: {}".format(label[0,0])
        print "Data in numpy format:"
        oh = data_utils.to_one_hot(data[0,0])
        print np.array_str(np.argmax(oh,axis=0))
        print "Translated back into characters:\n"
        print ''.join(data_utils.from_one_hot(oh))

        # demo balanced batching
        amz_balanced_batcher = batch_data(amz_train,balance_labels=True)
        balanced_batch = amz_balanced_batcher.next()
        print 'Balanced batch:'
        balanced_label_counts = {}
        for idx in range(balanced_batch[1].shape[0]):
            label = balanced_batch[1][idx,0]
            balanced_label_counts[label] = balanced_label_counts.get(label, 0) + 1
        print balanced_label_counts

    # Demo iterator utility classes
    # iterate multiple times over same data 
    if args.iterator_demo:
        # Demo dataIterator class
        transformer_fun=None)
    am_test_batch = batch_data.batch_data(amte,
        normalizer_fun=None,transformer_fun=None)
    
    # Spit out some sample data
    next_batch = am_train_batch.next()
    data, label = next_batch
    np.set_printoptions(threshold=np.nan)
    print "Batch properties:"
    print "Length: {}".format(len(data))
    print "Type: {}".format(type(data))
    print
    print "First record of first batch:"
    print "Type (1 level in): {}".format(type(data[0]))
    print "Type of record (2 levels in): {}".format(type(data[0,0]))
    print data[0,0]
    print "Sentiment label: {}".format(label[0])
    print "In numpy format:"
    oh = data_utils.to_one_hot(data[0,0])
    print np.array_str(np.argmax(oh,axis=0))
    print "Translated back into characters:\n"
    print data_utils.from_one_hot(oh)
    
    # dimension checks
    second_batch_data, second_batch_label = second_batch = am_train_batch.next()
    second_batch = list(second_batch)
    print len(second_batch)
    print "Data object type: ", type(second_batch_data)
    print second_batch_data.shape