예제 #1
0
def load_lfw_dataset_EW(use_raw=False, dimx=45, dimy=45):

    df_attrs = pd.read_csv(ATTRS_NAME, sep='\t', skiprows=1)
    df_attrs.columns = list(df_attrs.columns)[1:] + ["NaN"]
    df_attrs = df_attrs.drop("NaN", axis=1)
    imgs_with_attrs = set(map(tuple, df_attrs[["person", "imagenum"]].values))

    # read photos
    photos = []
    mapping = []
    index = 0

    with ZipFile(IMAGES_NAME_POC) as f:
        for m in tqdm_utils.tqdm_notebook_failsafe(f.namelist()):
            # prepare image
            img = decode_image_from_raw_bytes(f.open(m).read())
            img = cv2.resize(img, (dimx, dimy))
            # parse person
            fname = os.path.split(m)[-1]
            fname_splitted = fname[:-4].replace('_', ' ').split()
            if fname_splitted[-1][-1].isdigit():
                photo_number = int(fname_splitted[-1])
                photos.append(img)
                mapping.append([index, '_'.join(fname_splitted)])
                index += 1
    photos = np.stack(photos).astype('uint8')

    return photos, mapping
예제 #2
0
 def on_epoch_begin(self, epoch, logs=None):
     print('\nEpoch %d/%d' % (epoch + 1, self.epochs))
     if "steps" in self.params:
         self.use_steps = True
         self.target = self.params['steps']
     else:
         self.use_steps = False
         self.target = self.params['samples']
     self.prog_bar = tqdm_utils.tqdm_notebook_failsafe(total=self.target)
     self.log_values_by_metric = defaultdict(list)
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in tqdm_utils.tqdm_notebook_failsafe(range(0, len(inputs) - batchsize + 1, batchsize)):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]
def test_validation_loss(decoder, s, generate_batch, val_img_embeds,
                         val_captions_indexed):
    np.random.seed(300)
    random.seed(300)
    val_loss = 0
    batches_for_eval = 1000
    for _ in tqdm_utils.tqdm_notebook_failsafe(range(batches_for_eval)):
        val_loss += s.run(
            decoder.loss,
            generate_batch(val_img_embeds, val_captions_indexed, 32, 20))
    val_loss /= 1000.
    return val_loss
예제 #5
0
def download_file(url, file_path):
    r = requests.get(url, stream=True)
    total_size = int(r.headers.get('content-length'))
    try:
        with open(file_path, 'wb', buffering=16 * 1024 * 1024) as f:
            bar = tqdm_utils.tqdm_notebook_failsafe(total=total_size,
                                                    unit='B',
                                                    unit_scale=True)
            bar.set_description(os.path.split(file_path)[-1])
            for chunk in r.iter_content(32 * 1024):
                f.write(chunk)
                bar.update(len(chunk))
            bar.close()
    except Exception:
        print("Download failed")
    finally:
        if os.path.getsize(file_path) != total_size:
            os.remove(file_path)
            print("Removed incomplete download")
예제 #6
0
def download_file(url, file_path):
    r = requests.get(url, stream=True)
    total_size = int(r.headers.get('content-length'))
    bar = tqdm_utils.tqdm_notebook_failsafe(total=total_size, unit='B', unit_scale=True)
    bar.set_description(os.path.split(file_path)[-1])
    incomplete_download = False
    try:
        with open(file_path, 'wb', buffering=16 * 1024 * 1024) as f:
            for chunk in r.iter_content(4 * 1024 * 1024):
                f.write(chunk)
                bar.update(len(chunk))
    except Exception as e:
        raise e
    finally:
        bar.close()
        if os.path.exists(file_path) and os.path.getsize(file_path) != total_size:
            incomplete_download = True
            os.remove(file_path)
    if incomplete_download:
        raise Exception("Incomplete download")
예제 #7
0
def load_lfw_dataset(use_raw=False, dx=80, dy=80, dimx=45, dimy=45):

    # read attrs
    df_attrs = pd.read_csv(ATTRS_NAME, sep='\t', skiprows=1)
    df_attrs = pd.DataFrame(df_attrs.iloc[:, :-1].values,
                            columns=df_attrs.columns[1:])
    imgs_with_attrs = set(map(tuple, df_attrs[["person", "imagenum"]].values))

    # read photos
    all_photos = []
    photo_ids = []

    with tarfile.open(RAW_IMAGES_NAME if use_raw else IMAGES_NAME) as f:
        for m in tqdm_utils.tqdm_notebook_failsafe(f.getmembers()):
            if m.isfile() and m.name.endswith(".jpg"):
                # prepare image
                img = decode_image_from_raw_bytes(f.extractfile(m).read())
                img = img[dy:-dy, dx:-dx]
                img = cv2.resize(img, (dimx, dimy))
                # parse person
                fname = os.path.split(m.name)[-1]
                fname_splitted = fname[:-4].replace('_', ' ').split()
                person_id = ' '.join(fname_splitted[:-1])
                photo_number = int(fname_splitted[-1])
                if (person_id, photo_number) in imgs_with_attrs:
                    all_photos.append(img)
                    photo_ids.append({
                        'person': person_id,
                        'imagenum': photo_number
                    })

    photo_ids = pd.DataFrame(photo_ids)
    all_photos = np.stack(all_photos).astype('uint8')

    # preserve photo_ids order!
    all_attrs = photo_ids.merge(df_attrs,
                                on=('person',
                                    'imagenum')).drop(["person", "imagenum"],
                                                      axis=1)

    return all_photos, all_attrs
예제 #8
0
    def reading_thread(zip_fn):
        zf = zipfile.ZipFile(zip_fn)
        for fn in tqdm_utils.tqdm_notebook_failsafe(zf.namelist()):
            if kill_read_thread.is_set():
                break
            if os.path.splitext(fn)[-1] in extensions:
                buf = zf.read(fn)  # read raw bytes from zip for fn
                img = decode_image_from_buf(buf)  # decode raw bytes
                img = crop_and_preprocess(img, input_shape,
                                          preprocess_for_model)
                while True:
                    try:
                        q.put((os.path.split(fn)[-1], img),
                              timeout=1)  # put in queue
                    except queue.Full:
                        if kill_read_thread.is_set():
                            break
                        continue
                    break

        read_thread_completed.set()  # read all images
예제 #9
0
# # Submit to Coursera

# In[25]:


# token expires every 30 min
COURSERA_TOKEN = "*****************"
COURSERA_EMAIL = "d****************m"


# In[26]:


from submit import submit_char_rnn
samples = [generate_sample(' Al') for i in tqdm_utils.tqdm_notebook_failsafe(range(25))]
submission = (history, samples)
submit_char_rnn(submission, COURSERA_EMAIL, COURSERA_TOKEN)


# # Try it out!
# 
# __Disclaimer:__ This part of assignment is entirely optional. You won't receive bonus points for it. However, it's a fun thing to do. Please share your results on course forums.
# 
# You've just implemented a recurrent language model that can be tasked with generating any kind of sequence, so there's plenty of data you can try it on:
# 
# * Novels/poems/songs of your favorite author
# * News titles/clickbait titles
# * Source code of Linux or Tensorflow
# * Molecules in [smiles](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) format
# * Melody in notes/chords format
# you can load "weights_{epoch}" and continue training
# uncomment the next line if you need to load weights
# saver.restore(s, os.path.abspath("weights"))
"""Look at the training and validation loss, they should be decreasing!"""

# actual training loop
MAX_LEN = 20  # truncate long captions to speed up training

# to make training reproducible
np.random.seed(42)
random.seed(42)

for epoch in range(n_epochs):

    train_loss = 0
    pbar = tqdm_utils.tqdm_notebook_failsafe(range(n_batches_per_epoch))
    counter = 0
    for _ in pbar:
        train_loss += s.run([decoder.loss, train_step],
                            generate_batch(train_img_embeds,
                                           train_captions_indexed, batch_size,
                                           MAX_LEN))[0]
        counter += 1
        pbar.set_description("Training loss: %f" % (train_loss / counter))

    train_loss /= n_batches_per_epoch

    val_loss = 0
    for _ in range(n_validation_batches):
        val_loss += s.run(
            decoder.loss,
def sample_probas(bsize):
    plt.title('Generated vs real data')
    plt.hist(np.exp(discriminator.predict(sample_data_batch(bsize)))[:,1],
             label='D(x)', alpha=0.5,range=[0,1])
    plt.hist(np.exp(discriminator.predict(generator.predict(sample_noise_batch(bsize))))[:,1],
             label='D(G(z))',alpha=0.5,range=[0,1])
    plt.legend(loc='best')
    plt.show()

"""### Training
Main loop.
We just train generator and discriminator in a loop and plot results once every N iterations.
"""

from IPython import display
for epoch in tqdm_utils.tqdm_notebook_failsafe(range(3000)):
    
    feed_dict = {
        real_data:sample_data_batch(100),
        noise:sample_noise_batch(100)
    }
    
    for i in range(5):
        s.run(disc_optimizer,feed_dict)
    
    s.run(gen_optimizer,feed_dict)
    
    if epoch %100==0:
        display.clear_output(wait=True)
        sample_images(2,3,True)
        sample_probas(1000)
예제 #12
0
for _ in range(10):
    print(generate_sample())

# with prefix conditioning
for _ in range(10):
    print(generate_sample(' Trump'))
"""# Submit to Coursera"""

# token expires every 30 min
COURSERA_TOKEN = "XXXXXXXXXXXX"
COURSERA_EMAIL = "XXXXXXXXXXXXXXXXXXXXXXXXXX"

from submit import submit_char_rnn
samples = [
    generate_sample(' Al')
    for i in tqdm_utils.tqdm_notebook_failsafe(range(25))
]
submission = (history, samples)
submit_char_rnn(submission, COURSERA_EMAIL, COURSERA_TOKEN)
"""# Try it out!
__Disclaimer:__ This part of assignment is entirely optional. You won't receive bonus points for it. However, it's a fun thing to do. Please share your results on course forums.
You've just implemented a recurrent language model that can be tasked with generating any kind of sequence, so there's plenty of data you can try it on:
* Novels/poems/songs of your favorite author
* News titles/clickbait titles
* Source code of Linux or Tensorflow
* Molecules in [smiles](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) format
* Melody in notes/chords format
* IKEA catalog titles
* Pokemon names
* Cards from Magic, the Gathering / Hearthstone
If you're willing to give it a try, here's what you wanna look at: