def __init__(self, csv_path=None, img_path=None, transform=None, mode='regression', sim_threshold=0.8): if csv_path is None: csv_path = BASE_PATH + '/asos.csv' if img_path is None: img_path = BASE_PATH + '/asos_images' self.mode = mode self.sim_threshold = sim_threshold self.mlb = MultiLabelBinarizer() self.transform = transform self.reference_dataset = self.read_reference_dataset(csv_path) self.iids = self.reference_dataset['iid'] self.X_train = self.reference_dataset['destination_path'] logger.INFO("Fit/Transform MultiLabelBinarizer") self.labels = self.mlb.fit_transform( self.reference_dataset['labels']).astype(np.float32) logger.INFO("Number of unique labels %s" % (len(self.mlb.classes_.tolist()))) self.img_path = img_path
def cli(ctx, config, train): logger.INFO("Managing TagWalk Models") logger.INFO("Using model: %s" % AsosSiameseTrainer.__name__) configuration = exp.read_experiment_configuration(config) engine = AsosSiameseTrainer(configuration) _ = engine.run(training=train)
def cli(ctx, model, config, train): logger.INFO("Managing TagWalk Models") model_cls = TagWalkCNNRNN if model == 'cnn': model_cls = TagWalkClassifier logger.INFO("Using model: %s" % model_cls.__name__) configuration = exp.read_experiment_configuration(config) engine = model_cls(configuration) _ = engine.run(training=train)
def read_experiment_configuration(config_path): with open(config_path, 'r') as config_file: data = (config_file.read().replace('\n', '').replace(' ', '').strip()) configuration = json.loads(data) logger.INFO("Using Configuration %s" % configuration) return configuration
def save_images(self, dirname='asos_images/'): # TODO: Generalise for reuse l.INFO('Saving images to: %s' % (self.img_directory)) misses = {} opener = urllib.request.URLopener() def dl_url(row): output_path = (self.img_directory + str(row.iid)) try: images_urls = list(set(row['images'])) for i, url in enumerate(images_urls): url = url.replace("$S$", "$XXL$") img_path = (output_path + '__' + str(i) + '.jpg') if not os.path.isfile(img_path): opener.retrieve(url, img_path) return False except Exception as e: l.ERROR("%s --> %s" % (e, output_path)) return True for _, row in self.df.iterrows(): err = dl_url(row) misses[row.iid] = err misses_df = pd.DataFrame({ 'id': misses.keys(), 'status': misses.values() }) misses_df.to_csv(path + '__meta.csv', index=False) self.image_statuses_df = misses_df
def flatten_images_directory(self): tqdm_pandas(tqdm()) logger.INFO("Flattening images data") self.build_all_images_dir() self.ref_dataset.progress_apply( lambda x: copyfile(x['origin_path'], x['destination_path']), axis=1)
def cli(ctx, package, push, pull, clean, sync, archive_name, data_dir): logger.INFO("Managing Project data") data_dir = configuration.BASE_DATA logger.INFO("Using: %s" % (data_dir)) if package: package_data(archive_name, data_dir) if push and not sync: push_archive(archive_name) if clean: clean_archive(archive_name) if sync and push: sync_to_s3_data(data_dir) if sync and pull: sync_from_s3_data(data_dir)
def prepare(self, df=True, labels=True, images=True): if self.df is None: self.df = self.build() if self.labels is None: self.labels = self.build_labels() if df: path = '/'.join([self.output_dir, 'fashionista.csv']) l.INFO('Saving to: %s' %(path)) self.df.to_csv(path) if labels: self.save_labels() if images: self.save_images()
def validate(self, epoch): dataset = iter(self.loader_dict['validation']) pbar = tqdm(dataset) self.set_eval() losses = [] for _, batch_data in enumerate(pbar): _, loss, metrics = self.on_batch_data(batch_data, mode='validation') self.update_history_metrics(metrics) losses.append(loss.data[0]) self.show_debug(batch_data) mean_loss = np.mean(np.array(losses)) if self.must_save(epoch, mean_loss): logger.INFO("Saving for validation loss %s" % (mean_loss)) self.save_model() return mean_loss
def build_reference_dataset(self): logger.INFO("Building reference dataset") for label in tqdm(self.labels): images = self.crawl_memory[label]['images'] for image in images: image['label'] = label image['type'] = 'original' self.fill_ref_dict(image) ref_df = pd.DataFrame.from_dict(self.ref_dict, orient='columns') ref_df['origin_path'] = (ref_df['path'].apply(get_image_path_prefix)) ref_df['origin_path'] = self.data_dir + ref_df['origin_path'] ref_df['destination_path'] = ( self.all_images_dir + ref_df['designer'] + '__' + ref_df['season'] + '__' + ref_df['name'].apply(lambda x: x.lower( ).replace(' ', '_').encode('ascii', 'ignore').decode('ascii'))) return ref_df.reset_index(drop=True)
def prepare(self, df=True, labels=True, images=True, reset=False): if os.path.isfile(self.asos_path) and not reset: self.df = self.read_asos_df() print(self.df.head()) else: if self.df is None: self.df = self.build() if self.labels is None: self.labels = self.build_labels() if df: l.INFO('Saving to: %s' % (self.asos_path)) self.df.to_csv(self.asos_path) if labels: self.save_labels() if images: self.save_images()
def _execute(command): logger.INFO(command) return execute(command)
def read_model(self): logger.INFO("Trying to load %s" % (self.chk_filename)) checkpoint = torch.load(self.chk_filename) for model in self.model: self.model[model].load_state_dict(checkpoint['state_dict'][model]) self.history = checkpoint['history']
def must_save(self, epoch, loss): losses_df = pd.DataFrame({'loss': self.history['metrics']['val_loss']}) losses_df['index'] = losses_df.index means = losses_df['loss'].rolling(self.batch_size).mean().tolist() logger.INFO("Last mean val_loss: %s %s" % (means[-1], means[-2])) return (epoch == 0 or means[-1] <= means[-2])
def managers(): l.INFO("Managment Command Detected")
def main(**kwargs): l.INFO("Starting TagWalk")
def read_reference_dataset(self, csv_path): logger.INFO("Reading reference_dataset") df = pd.read_csv(csv_path) df['labels'] = df['attributes'].apply(str_to_array) return df
def cli(ctx, df, images): logger.INFO("Preparing TagWalk data") prep = TagWalk() prep.prepare(df=df, images=images)
def modeling(): l.INFO("Model Command Detected")
def cli(ctx, df, labels, images): l.INFO("Preparing ASOS data") prep = Fashionista() prep.prepare(df=df, labels=labels, images=images)
def save_images(self, dirname='fashionista_images/'): path = '/'.join([self.output_dir, dirname]) l.INFO('Saving images to: %s' %(path))
def cli(ctx, reset, df, labels, images): logger.INFO("Preparing ASOS data") prep = Asos(build=reset) prep.prepare(df=df, labels=labels, images=images, reset=reset)
def cli(ctx, df, labels, images): l.INFO("Preparing ASOS data") prep = PaperDoll() prep.prepare(df=df, labels=labels, images=images)
def builders(): l.INFO("Builder Command Detected")