Example #1
0
def write_all_unique_key_value_cnt(q: DatasetQuery):
    analysis_path = "./analysis/"
    #  Export csv of every value
    for i in range(1, 6):
        cnt = q.get_all_unique_key_value_attrs(i)
        # Convert from cnt to 2d list
        # cnt: Dict[Counter]
        #   or Dict[Dict[int]]: attrs_key, attr_value(key), count
        rows = []
        for key, attr_obs in cnt.items():
            for attr, count in attr_obs.items():
                rows.append([key, attr, count])
        name = analysis_path + "unique_attr_key_val_cnt" + str(i) + ".csv"
        DatasetQuery.export_csv(rows, name)
Example #2
0
def write_all_data(q: DatasetQuery):

    # Write data analysis every unique attrs
    p = "./analysis/"
    for i in range(1, 6):
        dd = q.get_all_unique_key_attributes(i)

        name = p + "Unique_attrs_cat" + str(i) + ".txt"
        DatasetQuery.export_txt(dd, name)

    p = "./analysis/"
    for i in range(1, 6):
        dd = q.get_all_unique_key_attributes(i)
        name = p + "unique_attr_cnt" + str(i) + ".csv"
        # DatasetQuery.export_txt(dd, name)
        DatasetQuery.export_csv(dd.items(), name)
Example #3
0
def get_dataset_query():
    d = get_dataset()
    return DatasetQuery(d)
Example #4
0
def print_random_data(q: DatasetQuery, category: int):
    # Get randome data and image
    dta = q.get_random_data(category)
    print(dta)
    return dta.get_image(dta.primary_image_url)
Example #5
0
            for key_attr, val_attrs in data.attributes.items():
                sentences_matrix[key_attr].append(val_attrs)
            pbar.update(1)
    print("Done!")

    print("Creating Word2Vec models from globs...")
    with tqdm(total=len(sentences_matrix.items())) as pbar:
        for key, sentences in sentences_matrix.items():
            models[key] = Word2Vec(sentences, min_count=1)
            pbar.update(1)
    print("Done!")


if __name__ == '__main__':
    d = get_dataset()
    q = DatasetQuery(d)
    print("Extracting every key to key_list and build model vector...")
    n_first_key_to_cluster = 7
    key_list = q.get_most_frequent_keys(1, n_first_key_to_cluster)[:, 0]
    # models.keys() is
    # dict_keys(['brand', 'inseam', 'size type', "bottoms size women's", 'material'])
    models: Dict[str, Word2Vec] = Embedding.extract_keys_vocab(d, 1, key_list)

    print("Building tree...")
    # Working from here
    head = -1  # Get the whole data.
    birch_tree = BirchTree(d, 1, models, head=head)
    tree = birch_tree.build_tree(verbose=False)

    # birch_tree.save_birch_tree_to_binary(TREE_FOLDER_PATH)