Ejemplo n.º 1
0
    def __init__(self,
                 csv_path=None,
                 img_path=None,
                 transform=None,
                 mode='regression',
                 sim_threshold=0.8):
        if csv_path is None:
            csv_path = BASE_PATH + '/asos.csv'

        if img_path is None:
            img_path = BASE_PATH + '/asos_images'

        self.mode = mode
        self.sim_threshold = sim_threshold

        self.mlb = MultiLabelBinarizer()
        self.transform = transform

        self.reference_dataset = self.read_reference_dataset(csv_path)

        self.iids = self.reference_dataset['iid']
        self.X_train = self.reference_dataset['destination_path']

        logger.INFO("Fit/Transform MultiLabelBinarizer")
        self.labels = self.mlb.fit_transform(
            self.reference_dataset['labels']).astype(np.float32)

        logger.INFO("Number of unique labels %s" %
                    (len(self.mlb.classes_.tolist())))

        self.img_path = img_path
Ejemplo n.º 2
0
def cli(ctx, config, train):
    logger.INFO("Managing TagWalk Models")
    logger.INFO("Using model: %s" % AsosSiameseTrainer.__name__)

    configuration = exp.read_experiment_configuration(config)

    engine = AsosSiameseTrainer(configuration)
    _ = engine.run(training=train)
Ejemplo n.º 3
0
def cli(ctx, model, config, train):
    logger.INFO("Managing TagWalk Models")

    model_cls = TagWalkCNNRNN
    if model == 'cnn':
        model_cls = TagWalkClassifier

    logger.INFO("Using model: %s" % model_cls.__name__)

    configuration = exp.read_experiment_configuration(config)

    engine = model_cls(configuration)
    _ = engine.run(training=train)
Ejemplo n.º 4
0
def read_experiment_configuration(config_path):
    with open(config_path, 'r') as config_file:
        data = (config_file.read().replace('\n', '').replace(' ', '').strip())
        configuration = json.loads(data)

    logger.INFO("Using Configuration %s" % configuration)
    return configuration
Ejemplo n.º 5
0
    def save_images(self, dirname='asos_images/'):
        # TODO: Generalise for reuse
        l.INFO('Saving images to: %s' % (self.img_directory))

        misses = {}
        opener = urllib.request.URLopener()

        def dl_url(row):
            output_path = (self.img_directory + str(row.iid))
            try:
                images_urls = list(set(row['images']))

                for i, url in enumerate(images_urls):
                    url = url.replace("$S$", "$XXL$")
                    img_path = (output_path + '__' + str(i) + '.jpg')
                    if not os.path.isfile(img_path):
                        opener.retrieve(url, img_path)
                return False
            except Exception as e:
                l.ERROR("%s --> %s" % (e, output_path))
                return True

        for _, row in self.df.iterrows():
            err = dl_url(row)
            misses[row.iid] = err

        misses_df = pd.DataFrame({
            'id': misses.keys(),
            'status': misses.values()
        })
        misses_df.to_csv(path + '__meta.csv', index=False)
        self.image_statuses_df = misses_df
Ejemplo n.º 6
0
    def flatten_images_directory(self):
        tqdm_pandas(tqdm())
        logger.INFO("Flattening images data")
        self.build_all_images_dir()

        self.ref_dataset.progress_apply(
            lambda x: copyfile(x['origin_path'], x['destination_path']),
            axis=1)
Ejemplo n.º 7
0
def cli(ctx, package, push, pull, clean, sync, archive_name, data_dir):
    logger.INFO("Managing Project data")

    data_dir = configuration.BASE_DATA
    logger.INFO("Using: %s" % (data_dir))

    if package:
        package_data(archive_name, data_dir)

    if push and not sync:
        push_archive(archive_name)

    if clean:
        clean_archive(archive_name)

    if sync and push:
        sync_to_s3_data(data_dir)

    if sync and pull:
        sync_from_s3_data(data_dir)
Ejemplo n.º 8
0
    def prepare(self, df=True, labels=True, images=True):
        if self.df is None:
            self.df = self.build()
        if self.labels is None:
            self.labels = self.build_labels()

        if df:
            path = '/'.join([self.output_dir, 'fashionista.csv'])
            l.INFO('Saving to: %s' %(path))
            self.df.to_csv(path)

        if labels:
            self.save_labels()

        if images:
            self.save_images()
Ejemplo n.º 9
0
    def validate(self, epoch):
        dataset = iter(self.loader_dict['validation'])
        pbar = tqdm(dataset)

        self.set_eval()

        losses = []
        for _, batch_data in enumerate(pbar):
            _, loss, metrics = self.on_batch_data(batch_data,
                                                  mode='validation')
            self.update_history_metrics(metrics)
            losses.append(loss.data[0])
            self.show_debug(batch_data)

        mean_loss = np.mean(np.array(losses))
        if self.must_save(epoch, mean_loss):
            logger.INFO("Saving for validation loss %s" % (mean_loss))
            self.save_model()
        return mean_loss
Ejemplo n.º 10
0
    def build_reference_dataset(self):
        logger.INFO("Building reference dataset")

        for label in tqdm(self.labels):
            images = self.crawl_memory[label]['images']
            for image in images:
                image['label'] = label
                image['type'] = 'original'
                self.fill_ref_dict(image)
        ref_df = pd.DataFrame.from_dict(self.ref_dict, orient='columns')

        ref_df['origin_path'] = (ref_df['path'].apply(get_image_path_prefix))
        ref_df['origin_path'] = self.data_dir + ref_df['origin_path']

        ref_df['destination_path'] = (
            self.all_images_dir + ref_df['designer'] + '__' +
            ref_df['season'] + '__' + ref_df['name'].apply(lambda x: x.lower(
            ).replace(' ', '_').encode('ascii', 'ignore').decode('ascii')))

        return ref_df.reset_index(drop=True)
Ejemplo n.º 11
0
    def prepare(self, df=True, labels=True, images=True, reset=False):
        if os.path.isfile(self.asos_path) and not reset:
            self.df = self.read_asos_df()
            print(self.df.head())
        else:
            if self.df is None:
                self.df = self.build()

        if self.labels is None:
            self.labels = self.build_labels()

        if df:
            l.INFO('Saving to: %s' % (self.asos_path))
            self.df.to_csv(self.asos_path)

        if labels:
            self.save_labels()

        if images:
            self.save_images()
Ejemplo n.º 12
0
def _execute(command):
    logger.INFO(command)
    return execute(command)
Ejemplo n.º 13
0
 def read_model(self):
     logger.INFO("Trying to load %s" % (self.chk_filename))
     checkpoint = torch.load(self.chk_filename)
     for model in self.model:
         self.model[model].load_state_dict(checkpoint['state_dict'][model])
     self.history = checkpoint['history']
Ejemplo n.º 14
0
 def must_save(self, epoch, loss):
     losses_df = pd.DataFrame({'loss': self.history['metrics']['val_loss']})
     losses_df['index'] = losses_df.index
     means = losses_df['loss'].rolling(self.batch_size).mean().tolist()
     logger.INFO("Last mean val_loss: %s %s" % (means[-1], means[-2]))
     return (epoch == 0 or means[-1] <= means[-2])
Ejemplo n.º 15
0
def managers():
    l.INFO("Managment Command Detected")
Ejemplo n.º 16
0
def main(**kwargs):
    l.INFO("Starting TagWalk")
Ejemplo n.º 17
0
 def read_reference_dataset(self, csv_path):
     logger.INFO("Reading reference_dataset")
     df = pd.read_csv(csv_path)
     df['labels'] = df['attributes'].apply(str_to_array)
     return df
Ejemplo n.º 18
0
def cli(ctx, df, images):

    logger.INFO("Preparing TagWalk data")
    prep = TagWalk()
    prep.prepare(df=df, images=images)
Ejemplo n.º 19
0
def modeling():
    l.INFO("Model Command Detected")
Ejemplo n.º 20
0
def cli(ctx, df, labels, images):

    l.INFO("Preparing ASOS data")
    prep = Fashionista()
    prep.prepare(df=df, labels=labels, images=images)
Ejemplo n.º 21
0
 def save_images(self, dirname='fashionista_images/'):
     path = '/'.join([self.output_dir, dirname])
     l.INFO('Saving images to: %s' %(path))
Ejemplo n.º 22
0
def cli(ctx, reset, df, labels, images):

    logger.INFO("Preparing ASOS data")
    prep = Asos(build=reset)
    prep.prepare(df=df, labels=labels, images=images, reset=reset)
Ejemplo n.º 23
0
def cli(ctx, df, labels, images):

    l.INFO("Preparing ASOS data")
    prep = PaperDoll()
    prep.prepare(df=df, labels=labels, images=images)
Ejemplo n.º 24
0
def builders():
    l.INFO("Builder Command Detected")