Ejemplo n.º 1
0
    def make_npy(self):
        base = self.knn_num
        samples, labels = [], []
        paths = hp.get_paths(base)
        for path in paths:
            img = hp.get_image(base, path)
            sample = self.__buddy_hog(img)
            label = hp.get_name(path)
            labels.append(label)
            samples.append(sample)

        samples = np.float32(samples)
        labels = np.array(labels)
        np.save(self.samples_num_file, samples)
        np.save(self.labels_num_file, labels)

        base = self.knn_sym
        samples, labels = [], []
        paths = hp.get_paths(base)
        for path in paths:
            img = hp.get_image(base, path)
            sample = self.__buddy_hog(img)
            label = hp.get_name(path) - 10
            labels.append(label)
            samples.append(sample)

        samples = np.float32(samples)
        labels = np.array(labels)
        np.save(self.samples_sym_file, samples)
        np.save(self.labels_sym_file, labels)
Ejemplo n.º 2
0
    def make_npy(self):
        base = self.svm_num
        samples, labels = [], []
        paths = hp.get_paths(base)
        for path in paths:
            img = hp.get_image(base, path)
            sample = self.__buddy_hog(img)
            label = hp.get_name(path)
            labels.append(label)
            samples.append(sample)

        samples = np.float32(samples)
        labels = np.array(labels)
        np.save(self.samples_num_file, samples)
        np.save(self.labels_num_file, labels)

        base = self.svm_sym
        samples, labels = [], []
        paths = hp.get_paths(base)
        for path in paths:
            img = hp.get_image(base, path)
            sample = self.__buddy_hog(img)
            label = hp.get_name(path) - 10
            labels.append(label)
            samples.append(sample)

        samples = np.float32(samples)
        labels = np.array(labels)
        np.save(self.samples_sym_file, samples)
        np.save(self.labels_sym_file, labels)
Ejemplo n.º 3
0
def load(dir_num, dir_sym):
    print '\n##############'
    print 'loading nums...'
    num_paths = hp.get_paths(dir_num)
    for path in num_paths:
        num_images.append(hp.get_gray_image(dir_num, path))
        num_labels.append(hp.get_test(path, "num")[0])
    print 'loading syms...'
    sym_paths = hp.get_paths(dir_sym)
    for path in sym_paths:
        sym_images.append(hp.get_gray_image(dir_sym, path))
        sym_labels.append(hp.get_test(path, "sym")[0])
    print '##############'
Ejemplo n.º 4
0
def make_hog_num_file():
    base = hp.small_test_num_images_15x20
    samples, labels = [], []
    paths = hp.get_paths(base)
    for path in paths:
        img = hp.get_image(base, path)
        sample = buddy_hog(img)
        label = hp.get_name(path)
        labels.append(label)
        samples.append(sample)

    samples = np.float32(samples)
    labels = np.array(labels)
    labels = list(labels)

    # head
    document = ""
    head1 = str(len(samples)) + " "
    head2 = str(len(samples[0])) + " "
    head3 = str("10") + "\n"
    head = head1 + head2 + head3
    document += head
    for sample, label in zip(samples, labels):
        input = ' '.join(map(str, list(sample)))
        output = make_output_text_vector(int(label), 10)
        document += input + "\n" + output
    print document
    with open(os.path.join("ann/test/", "test-hog-nums.dat"),
              'wb') as temp_file:
        temp_file.write(document)
Ejemplo n.º 5
0
    def __make_knn_from_images(self, mode):
        base = ""
        modifier = 0
        samples, labels = [], []
        if mode == "num":
            base = self.knn_num
            modifier = 0
        elif mode == "sym":
            base = self.knn_sym
            modifier = 10
        paths = hp.get_paths(base)

        for path in paths:
            img = hp.get_image(base, path)
            sample = self.__buddy_hog(img)
            label = hp.get_name(path) - modifier
            labels.append(label)
            samples.append(sample)

        samples = np.float32(samples)
        labels = np.array(labels)

        model = KNearest(k=5)
        model.train(samples, labels)

        return model
Ejemplo n.º 6
0
    def __make_svm_from_images(self, dir_train, mode):
        samples, labels = [], []
        paths = hp.get_paths(dir_train)
        modifier = 0
        if mode == "num":
            modifier = 0
        elif mode == "sym":
            modifier = 10
        for path in paths:
            img = hp.get_image(dir_train, path)
            sample = self.__buddy_hog(img)
            label = hp.get_name(path) - modifier
            labels.append(label)
            samples.append(sample)

        samples = np.array(samples)
        labels = np.array(labels)

        rand = np.random.RandomState(321)
        shuffle = rand.permutation(len(samples))
        samples, labels = samples[shuffle], labels[shuffle]

        samples = np.float32(samples)

        model = SVM(C=2.67, gamma=5.383)
        model.train(samples, labels)
        return model
Ejemplo n.º 7
0
def make_hog_num_file():
    base = hp.small_test_num_images_15x20
    samples, labels = [], []
    paths = hp.get_paths(base)
    for path in paths:
        img = hp.get_image(base, path)
        sample = buddy_hog(img)
        label = hp.get_name(path)
        labels.append(label)
        samples.append(sample)

    samples = np.float32(samples)
    labels = np.array(labels)
    labels = list(labels)

    # head
    document = ""
    head1 = str(len(samples)) + " "
    head2 = str(len(samples[0])) + " "
    head3 = str("10") + "\n"
    head = head1 + head2 + head3
    document += head
    for sample, label in zip(samples, labels):
         input = ' '.join(map(str, list(sample)))
         output = make_output_text_vector(int(label), 10)
         document += input + "\n" + output
    print document
    with open(os.path.join("ann/test/", "test-hog-nums.dat"), 'wb') as temp_file:
        temp_file.write(document)
 def run(self):
     while True:
         if not self.__queue.empty():
             print 'process dir'
             dir_in = self.__queue.get()
             paths = hp.get_paths(dir_in)
             for num, path in enumerate(paths, start=1):
                 meta = {'id': random.randint(1000, 1000000000), 'plate': hp.get_image(dir_in, path)}
                 self.plate_captured.emit(meta)
                 # print path
             self.__queue.task_done()
Ejemplo n.º 9
0
def process_directory(directory, pmanager, file_types, path_ignore):
    if debug:
        print 'process_directory: '+directory
    file_paths, directory_paths = get_paths(directory)
    i = 0    
    for file_path in file_paths:
        fname, extension = os.path.splitext(file_path)
        extension = extension.replace('.', '')
        if extension in file_types:
            if '.' not in fname:            
                i = i + 1
                sync_local_photo(file_path, pmanager, path_ignore)
            else:
                print file_path+' skipped due to . in name'
    print "Syncing %i photos in local db" % i
Ejemplo n.º 10
0
def global_testing(input_dir, set_name, mode, ann, knn, msp, svm):
    print "Start recognition:"
    print "run test", (set_name, mode)

    abc = {3: 0, 2: 0, 1: 0}
    problem = 0
    paths = hp.get_paths(input_dir)
    total, err = len(paths), len(paths)
    for path in paths:
        # recognition
        img = hp.get_image(input_dir, path)

        at, ann_i = ann.rec(img, mode)
        kt, knn_i = knn.rec(img, mode)
        mt, msp_i = msp.rec(img, mode)
        st, svm_i = svm.rec(img, mode)
        # test
        tlit, ti = hp.get_test(path, mode)

        up_count_dict(tlit)
        up_dict(ann_dict, at, tlit)
        up_dict(knn_dict, kt, tlit)
        up_dict(msp_dict, mt, tlit)
        up_dict(svm_dict, st, tlit)

        rec = None
        xyz = {at, kt, st}
        if len(xyz) < 3:
            t = [at, kt, st]
            c = Counter(t)
            rec = max(c, key=c.get)
            if tlit == rec:
                up_dict(common_dict, rec, tlit)
                err -= 1
            else:
                rec = None

        if rec is None:
            abc[len(xyz)] += 1
            if len(xyz) == 1:
                cv2.imwrite("try/awesome/" + str([at, kt, st]) + "__" + str(tlit) + "__" + path, img)

    hp.print_result(total, err, set_name, mode)

    print "awesome:", abc[1]
    print "great:", abc[2]
    print "f**k this shit: ", abc[3]
    print "                 "
Ejemplo n.º 11
0
def testing_svm_from_image_base(input_dir, set_name, mode, svm):
    success, error = "", ""
    counter, err = 1, 0
    paths = hp.get_paths(input_dir)

    for path in paths:
        # recognition
        img = hp.get_image(input_dir, path)
        lit, i = svm.rec(img, mode)

        # test
        tlit, ti = hp.get_test(path, mode)
        if tlit != lit:
            err += 1
            error += "{0}\n".format(path)
            hp.write_image("try/error", path, img)
        else:
            success += "{0}\n".format(path)
        counter += 1

    hp.print_result(counter, err, set_name, mode)
Ejemplo n.º 12
0
def testing_from_image_base(input_dir, set_name, mode, ann):
    #init block
    success, error = "", ""
    counter, err = 1, 0
    paths = hp.get_paths(input_dir)
    for path in paths:
        # recognition
        img = hp.get_image(input_dir, path)
        lit, i = ann.rec(img, mode)
        # test
        tlit, ti = hp.get_test(path, mode)

        if tlit != lit:
            err += 1
            error += "{0}\n".format(path)
            hp.write_image("try/error", path, img)
        else:
            success += "{0}\n".format(path)
        counter += 1

    hp.print_result(counter, err, set_name, mode)
Ejemplo n.º 13
0
def visualize_text_clustering_result(km_model):
    '''
    Save images to different folders accoring to the kmeans clustering labels.

    INPUT: 
        km_model: trained KMeans model
    OUTPUT: 
        None
    ''' 

	text_clustering_result = collections.defaultdict(list)
    for i, label in enumerate(km_model.labels_):
        text_clustering_result[label].append(int(X.index[i]))

    paths = get_paths(category)
    for i in text_clustering_result:
        for j in text_clustering_result[i]:
            product_id = sofa_df.ix[j].product_id
            for path in paths:
                if product_id in path:
                    image = skimage.io.imread('wayfair/images/' + category + '/' + path)
                    new_path = 'wayfair/images/' + category + '/text_clustering/' + str(i) + '/' + path
                    skimage.io.imsave(new_path, image)


if __name__ == '__main__':
    categories = ['sofa', 'coffee_table', 'office', 'dining', 'bookcase', 'nightstand', 'bed', 'dresser']
    for category in categories:
        client = MongoClient()
        db = client['furniture']
Ejemplo n.º 14
0
def build_all_vec_info_json(category):
    """
    For each image within the category, calculate the dominant color and the PCAed vector.
    For each item within the category, use tfidf to vectorize the description.
    Merge the info above, together with product info (title, price, URL etc.) to one dataframe,
        then save it to a json file.

    INPUT:
        category: string
    OUTPUT:
        None
    """

    paths = get_paths(category)
    print len(paths)

    # Get dominant color:
    start_time = time.time()
    domi_color_dict = get_domi_color(paths, category)
    time_1 = time.time()
    print "Time to run get_domi_color (s): ", time_1 - start_time

    # Get PCAed features and pickle transformers:
    feature_dict_pca, pca_scaler, pca_model = image_featurizer(category, pca=True)

    path = "wayfair/pickle/" + category + "_pca_scaler.pkl"
    with open(path, "w") as f:
        pickle.dump(pca_scaler, f)
    path = "wayfair/pickle/" + category + "_pca_model.pkl"
    with open(path, "w") as f:
        pickle.dump(pca_model, f)

    # Building dataframe from dictionaries:
    domi_pca_dict = defaultdict(dict)
    for i in domi_color_dict:
        domi_pca_dict[i]["domi"] = domi_color_dict[i]
    for i in feature_dict_pca:
        domi_pca_dict[i]["pca"] = feature_dict_pca[i]

    domi_pca_df = pd.DataFrame(domi_pca_dict).T
    domi_pca_df = domi_pca_df.reset_index()
    domi_pca_df.rename(columns={"index": "path"}, inplace=True)
    domi_pca_df["product_id"] = domi_pca_df["path"].apply(lambda x: x.split(".")[0].split("_")[-2])

    client = MongoClient()
    db = client["furniture"]
    collection = db[category]
    products_df = pd.DataFrame(list(collection.find()))

    products_df_small = products_df[
        ["product_id", "title", "price", "url", "description_all", "rating_avg", "rating_count"]
    ]

    all_info_df = pd.merge(domi_pca_df, products_df_small, how="inner", left_on="product_id", right_on="product_id")

    # Train a tfidf vectorizer & pickle it:
    X = all_info_df["description_all"]
    tfidf = TfidfVectorizer(
        strip_accents="unicode", stop_words="english", max_df=0.8, max_features=1000, ngram_range=(1, 2)
    )
    tfidf_matrix = tfidf.fit_transform(X).todense()

    path = "wayfair/pickle/" + category + "_tfidf.pkl"
    with open(path, "w") as f:
        pickle.dump(tfidf, f)

    # Add tfidf vectors to the dataframe:
    tfidf_matrix = np.array(tfidf_matrix)

    tfidf_dict = defaultdict(dict)
    for i in xrange(tfidf_matrix.shape[0]):
        index = X.index[i]
        tfidf_dict[index]["tfidf_vec"] = tfidf_matrix[i, :]

    tfidf_df = pd.DataFrame(tfidf_dict).T
    tfidf_df = tfidf_df.reset_index()
    tfidf_df.rename(columns={"index": "ixx"}, inplace=True)

    all_info_df = pd.merge(all_info_df, tfidf_df, how="inner", left_index=True, right_on="ixx")
    all_info_df.drop("ixx", axis=1, inplace=True)

    # Some final clearning & save to json:
    all_info_df = all_info_df[all_info_df["domi"].notnull()]
    all_info_df = all_info_df[all_info_df["pca"].notnull()]
    all_info_df = all_info_df[all_info_df["description_all"].notnull()]

    path = "wayfair/" + category + "_vec_info.json"
    all_info_df.to_json(path)
Ejemplo n.º 15
0
 def __load_sym_patterns(self, input_dir):
     paths = hp.get_paths(input_dir)
     self.__patterns_sym = [
         hp.get_gray_image(input_dir, path) for path in paths
     ]
     self.__labels_sym = [hp.get_test(path, "sym")[0] for path in paths]
Ejemplo n.º 16
0
    import helper

    args = parse_arguments()
    hot_reload = args.hot_reload
    write_info_file(args)

    if hot_reload:
        client = docker.from_env()
        cwd = os.getcwd()

        docker_env = args.docker_env
        docker_image = args.image_name
        env = None
        config_file = None

        target_folder, mdai_folder, config_path = helper.get_paths(args)
        # Detect config file if exists
        if os.path.exists(config_path):
            config_file = config_path

        # Prioritize config file values if it exists
        if config_file is not None:
            config = helper.process_config_file(config_file)

        placeholder_values = hot_reload_values

        helper.resolve_parent_image(placeholder_values, config,
                                    helper.PARENT_IMAGE_DICT)
        helper.add_env_variables(placeholder_values, config.get("env"))
        relative_mdai_folder = os.path.relpath(mdai_folder, target_folder)
        os.chdir(os.path.join(BASE_DIRECTORY, "mdai"))
def clean(make_output, clean_data, orders_imported, verbose, run_pricing,
          ordernum, orderid, email, customerid, product, productid, lineitem,
          lineid, shipmentorder, shipmentid):
    '''
        param make_output <Boolean> output the data to different files, ie move from tsv to csv
        param clean_data <Boolean> clean the data and apply the proper fields we expect
        param orders_imported <Boolean> if the order record has been imported, flip to true so that
                                        we can create the linkage between order : record
    '''
    # field names for swapping down below
    ORDERC = 'Order__c'
    ACCOUNTC = 'AccountId__c'
    PRODUCTC = 'Product__c'
    LINEITEMC = 'OrderProduct__c'
    SHIPC = 'Shipment__c'

    # make logs
    logs = helper.Logger(verbose)

    # the actual good stuff
    dir_path = os.path.join(os.path.dirname(os.path.realpath(__file__)))
    logs.write("Current file directory :: %s" % dir_path)
    directories = [
        d for d in os.listdir(dir_path)
        if os.path.isdir(os.path.join(dir_path, d))
    ]
    print("Please choose a directory")
    for ix, dir in enumerate(directories):
        print(ix, dir)
    directoryindex = int(input(">> "))

    row = None
    if directoryindex < len(directories):
        # get the directory that we want to clean/run through
        clean_directory = directories[directoryindex]
        # retrieve root, dirs, and files from walk, retain only filenames
        _, _, filenames = next(
            os.walk(os.path.join(dir_path, clean_directory, 'tsv')),
            (None, None, []))

        # get the pricing files that we need
        if run_pricing:
            pricingfile = pricingmap.create_pricing(clean_directory)
            prices = pricingmap.return_pricing(pricingfile)
        else:
            pricingfile = os.path.join(pricingmap.make_dir(clean_directory),
                                       'order_product_pricing.csv')
            prices = pricingmap.return_pricing(pricingfile)

        import_folder = os.path.join(dir_path, 'imports')
        # generate the order map (commented out)
        # order_path = os.path.join(import_folder, 'imported_orders')
        # ordermap = helper.OrderMap(order_path, ordernum, orderid)
        # ordermapping = ordermap.get_order_map()
        # add a place to get customer mappings too
        customer_directory = os.path.join(import_folder, 'imported_customers')
        customermap = helper.CustomerMap(customer_directory, email, customerid)
        customermapping = customermap.get_customer_map()
        # and product mappings
        product_directory = os.path.join(import_folder, 'imported_products')
        productmap = helper.ProductMap(product_directory, product, productid)
        productmapping = productmap.get_product_map()
        # and finally order products
        lineitem_directory = os.path.join(import_folder,
                                          'imported_orderproducts')
        linemap = helper.LineItemMap(lineitem_directory, lineitem, lineid)
        lineitemmapping = linemap.get_lineitem_map()
        # and shipments too
        shipment_directory = os.path.join(import_folder, 'imported_shipments')
        shipmentmap = helper.ShipmentMap(shipment_directory, shipmentorder,
                                         shipmentid)
        shipmentmapping = shipmentmap.get_shipment_map()

        # for each file, let's clean out the tsvs and export to csv
        for filename in filenames:
            logs.write('Reading %s' % filename)
            if filename[0] == '.' or filename[0] == '_':
                logs.write('Skipping, invalid file %s' % filename)
            else:
                filetype = helper.chooser(filename)
                # directory to read from, read to, and clean to
                p_in, p_out, p_clean = helper.get_paths(
                    dir_path, clean_directory)
                # get headers
                order_type, header = helper.get_headers(
                    os.path.join(p_in, filename), logs)

                # need to handle this encoding because we have weird bytes
                with open(os.path.join(p_in, filename),
                          encoding='ISO-8859-1') as tsvfile:
                    reader = csv.DictReader(
                        tsvfile, delimiter='\t'
                    )  # use a DictReader to preserve header names
                    if make_output:
                        # backup the stuff from tsv to csv
                        out_filename = '%s%s.csv' % (filename.split('.')[0],
                                                     order_type)
                        with open(os.path.join(p_out, out_filename),
                                  'w') as csvfile:
                            csvwriter = csv.writer(csvfile,
                                                   delimiter=',',
                                                   quotechar='"',
                                                   quoting=csv.QUOTE_ALL)
                            try:
                                csvwriter.writerow(header)
                                for row in reader:
                                    csvwriter.writerow(v
                                                       for k, v in row.items())
                            except Exception as e:
                                logs.write(row) if row else logs.write(
                                    'Exception !!')
                                logs.write(e)
                                break
                if clean_data:
                    logs.write('cleaning %s' % filename)
                    # make a new list for objects that dont have  record
                    # missingorders = []
                    # missing_filename = '%s%s-Clean.csv' % (filename.split('.')[0], '_MissingOrderLink')
                    with open(os.path.join(p_in, filename),
                              encoding='ISO-8859-1') as tsvfile:
                        clean_reader = csv.DictReader(
                            tsvfile,
                            delimiter='\t')  # and one for the clean file
                        if make_output:
                            # and create a new clean file for this
                            clean_filename = '%s%s-Clean.csv' % (
                                filename.split('.')[0], order_type)
                            with open(os.path.join(p_clean, clean_filename),
                                      'w') as cleanfile:
                                cleanwriter = csv.writer(cleanfile,
                                                         delimiter=',',
                                                         quotechar='"',
                                                         quoting=csv.QUOTE_ALL)
                                try:
                                    if filetype == 1 or filetype == 2:
                                        header.append('Product_External_ID__c')
                                        header.append('Order_External_ID__c')
                                    elif filetype == 4:
                                        header.append('Custom_Field_1__c')
                                        header.append('Order_External_Id__c')
                                    elif filetype == 5:
                                        header.append(
                                            'TLA_Shipment_Provider_Carrier__c')
                                        header.append('Order_External_ID__c')
                                        header.append('Order_Site_Id__c')

                                    cleanwriter.writerow(header)

                                    for crow in clean_reader:
                                        if filetype == 3:
                                            helper.order_cleaner(
                                                crow, prices, ordernum)
                                            helper.map__c(
                                                crow, customermapping,
                                                ACCOUNTC)
                                        if orders_imported and filetype != 3:
                                            if filetype == 1:  # order product
                                                helper.order_product_cleaner(
                                                    crow)
                                                helper.map__c(
                                                    crow, productmapping,
                                                    PRODUCTC)
                                            elif filetype == 2:  # shipment product
                                                helper.shipment_product_cleaner(
                                                    crow)
                                                helper.map__c(
                                                    crow, lineitemmapping,
                                                    LINEITEMC)
                                                helper.map__c(
                                                    crow, shipmentmapping,
                                                    SHIPC)
                                            elif filetype == 4:  # payment
                                                helper.payment_cleaner(crow)
                                            elif filetype == 5:  # shipment
                                                helper.shipment_cleaner(crow)
                                        cleanwriter.writerow(
                                            v for k, v in crow.items())
                                except Exception as e:
                                    logs.write(crow) if crow else logs.write(
                                        'Exception !!')
                                    logs.write(e)
                                    raise (e)
Ejemplo n.º 18
0
 def __load_sym_patterns(self, input_dir):
     paths = hp.get_paths(input_dir)
     self.__patterns_sym = [hp.get_gray_image(input_dir, path) for path in paths]
     self.__labels_sym = [hp.get_test(path, "sym")[0] for path in paths]