def main():
    argument_parser = argparse.ArgumentParser()

    argument_parser.add_argument(
        '-p',
        '--path',
        required=True,
        help='Path to folder containing binaries')

    argument_parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Print extracted information')

    arguments = argument_parser.parse_args()

    path = check_path(arguments.path)

    Extractor.setup_logging(arguments.debug)

    file_features = {}

    for file_name in os.listdir(path):
        abs_file_name = path + file_name
        if os.path.isfile(abs_file_name):
            get_file_features(abs_file_name)
Esempio n. 2
0
def main(argv):
    # url = 'https://www.dbs.com/hongkong-zh/about-us/our-management/piyush-gupta/default.page'
    # g = Goose({'stopwords_class': StopWordsChinese})
    # article = g.extract(url=url)
    # data = HanziConv.toSimplified(u''.join(article.cleaned_text[:]))
    # data = data.replace('\n', ' ').replace('\r', '')
    data = '汤姆是林肯的哥哥'
    print(data)

    extractor = Extractor()
    extractor.load()
    extractor.chunk_str(data)
    extractor.resolve_all_conference()
    print("Triple: ")
    print('\n'.join(str(p) for p in extractor.triple_list))

    extractor.release()
def process_image(image: str):
    """
    generalized function to process and label image
    :param image: path to image
    :return:
    """
    coords_path = Extractor.extract_faces(image, 1, )

    df = pd.read_csv(coords_path)

    images_locations = df.image_location.tolist()

    df['emotion'] = None

    for index, location in enumerate(images_locations):

        img = cv.imread(location,  cv.IMREAD_GRAYSCALE)

        reshaped = img.reshape((1, 48, 48, 1))

        emotion = MAPPING[MODEL.predict_classes([reshaped])[0]]

        df.at[index, "emotion"] = emotion

    original = cv.imread(image)

    for i in range(len(df)):

        cv.rectangle(original,
                     (df.loc[i, 'x_lo'], df.loc[i, 'y_lo']),
                     (df.loc[i, 'x_hi'], df.loc[i, 'y_hi']),
                     (0, 255, 0),
                     original.shape[0] // 1000)

        cv.putText(original,
                   df.loc[i, 'emotion'],
                   (df.loc[i, 'x_lo'] + 5, df.loc[i, 'y_hi'] - 5),
                   cv.FONT_HERSHEY_PLAIN,
                   original.shape[0] / 1000,
                   (0, 255, 0),
                   original.shape[0] // 400)

    plt.imsave(TARGET_DIR + image.split("/")[-1].split('.')[0] + '_labeled.jpg', convert_to_rgb(original))
Esempio n. 4
0
    def _extract_feature(self):
        with t.no_grad():
            self.photo_net.eval()
            self.sketch_net.eval()

            extractor = Extractor(e_model=self.photo_net,
                                  vis=False,
                                  dataloader=True)
            photo_data = extractor.extract(self.photo_test)

            extractor.reload_model(self.sketch_net)
            sketch_data = extractor.extract(self.sketch_test)

            photo_name = photo_data['name']
            photo_feature = photo_data['feature']

            sketch_name = sketch_data['name']
            sketch_feature = sketch_data['feature']

        return photo_name, photo_feature, sketch_name, sketch_feature
Esempio n. 5
0
    def _extract_feature_embedding(self):
        with t.no_grad():
            self.photo_net.eval()
            self.sketch_net.eval()

            extractor = Extractor(e_model=self.photo_net,
                                  cat_info=False,
                                  vis=False,
                                  dataloader=True)
            photo_data = extractor.extract(self.photo_test,
                                           batch_size=self.test_bs)

            extractor.reload_model(self.sketch_net)
            sketch_data = extractor.extract(self.sketch_test,
                                            batch_size=self.test_bs)

            photo_name = photo_data['name']
            photo_feature = photo_data['feature']

            sketch_name = sketch_data['name']
            sketch_feature = sketch_data['feature']

        return photo_name, photo_feature, sketch_name, sketch_feature
Esempio n. 6
0
kwargs.logscale = stob(kwargs.logscale, "logscale")

# convert int string to int type argument
kwargs.ncpu = int(kwargs.ncpu)
kwargs.total = int(kwargs.total)
kwargs.random_seed = int(kwargs.random_seed)
kwargs.nseg = int(kwargs.nseg)
kwargs.sr = int(kwargs.sr)
kwargs.winsize = int(kwargs.winsize)
kwargs.nfft = int(kwargs.nfft)

# convert float string to float type argument
kwargs.val_size = float(kwargs.val_size)
kwargs.test_size = float(kwargs.test_size)
kwargs.start_pos = float(kwargs.start_pos)
kwargs.interval = float(kwargs.interval)
kwargs.overlap = float(kwargs.overlap)
kwargs.eps = float(kwargs.eps)

if __name__ == "__main__":
    extractor = Extractor(**kwargs)
    # preprocessing video and audio files into npz format
    if kwargs.run_vid or kwargs.run_aud or kwargs.remove_unpaired:
        extractor.run(**kwargs)
    # remove redundant npz files
    if kwargs.remove_failure or kwargs.remove_unpaired_npz:
        extractor.remove_redundant(**kwargs)
    # make train, validation, and test data into different folders
    if kwargs.make_train_val_test_split:
        extractor.train_val_test_split(**kwargs)
Esempio n. 7
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 14 Jun 2019 11:27
hash an image file or list of image files
Tu Bui [email protected]
"""

import os
import numpy as np
import pandas as pd
from utils.extractor import Extractor
from utils.database import H5pyData

if __name__ == '__main__':
    hasher = Extractor()
    """hash a single image"""
    img_path = 'cat_neardup.png'
    des, th = hasher.extract2(img_path)
    print('hash: {}\nNear-duplication threshold: {}'.format(des, th))
    """hash batch of images and create a hash database"""
    img_lst = [
        'samples/cat.jpg', 'samples/airplane1.jpg', 'samples/airplane2.jpg'
    ]
    hashes, ths = hasher.extract_batch(img_lst)
    # save as numpy database
    np.savez('hash_database.npz', feats=hashes, ths=ths)
    # alternatively, you can save as hdf5 database (efficient for large database and incremental saving)
    data = H5pyData('hash_database.h5', 'w')
    data.append(hashes, ths)
    # also save the image paths for geometry matching
Esempio n. 8
0
def process(url_handler, firefox, csv_handler, process_additional_pages=False):

    for url in url_handler:
        sr_no = 1
        url = str(url)
        try:
            print("Surfing Over Just Dial ......!")
            print("LOADING : ", url)
            firefox.fetch(url)
            print(
                "Scrolling to bottom just to make sure, all contents are loaded!"
            )
            pagination_count = firefox.scroll_to_bottom()
            """
            Since the url is input from user, we have to be ready to handle ambiguity in urls.
                1. "..../Fabric-Retailers/nct-10890504"
                2. "..../Fabric-Retailers/nct-10890504/"
                3. "..../Fabric-Retailers/nct-10890504/page-1"
                4. "..../Fabric-Retailers/nct-10890504/page-1/"
                5. "..../Fabric-Retailers/nct-10890504/page-"
                6. "..../Fabric-Retailers/nct-10890504/page"
                ...
                ...
                for same input of first page.
                so we 
            """
            _formatted_url = url.rstrip('/')
            if pagination_count > 10 and process_additional_pages:
                """
                The Service provider supports only 10 pages as endless scrolling, 
                Hence, we need to scrape every 10th page! 

                if we are scraping the first page, and we have more than 10 pages, better we add 
                11th page, 21st page, 31st page ... etc to the urls, list! 
                """
                base_url = url.split('/page')[0] if '/page' in url else url
                print("adding further pages to list:", pagination_count)
                for tenth_digit in range(2, math.floor(pagination_count / 10)):
                    additional_urls.append(
                        f'{base_url}/page-{tenth_digit + 1}')
                    print(f'Added: {base_url}/page-{tenth_digit + 1}')
            cards = Extractor(firefox)
            # firefox.driver.minimize_window()
            for soup in cards.soup_list:
                soup = Parser(soup)
                csv_handler.write_row(
                    '\n{name},  {summery}, {address}, {phone_num}, {verification}, {link}'
                    .format(
                        **{
                            'name': clean(soup.name),
                            'summery': clean(soup.summery),
                            'address': clean(soup.address),
                            'phone_num': clean(soup.contact),
                            'verification': clean(soup.verification),
                            'link': clean(soup.link),
                        }))
                print(f"Entry Item #{sr_no} : \t", soup.name)
                sr_no = sr_no + 1
            time.sleep(6)
            print("=" * 60)
            print(f"Loaded {sr_no - 1} items from :: {url}")
            print("=" * 40)
        except Exception as e:
            time.sleep(6)
            print("=" * 60)
            print('Error detected : ' + str(e.__class__.__name__) + '\n')
            print(e)
            print(traceback.format_exc())
            print("=" * 40)
Esempio n. 9
0
PHOTO_RESNET = '/data1/zzl/model/caffe2torch/mixed_triplet_loss/photo/photo_resnet_85.pth'

# The trained model root for vgg
SKETCH_VGG = '/data1/zzl/model/caffe2torch/vgg_triplet_loss/sketch/sketch_vgg_190.pth'
PHOTO_VGG = '/data1/zzl/model/caffe2torch/vgg_triplet_loss/photo/photo_vgg_190.pth'

FINE_TUNE_RESNET = '/data1/zzl/model/caffe2torch/fine_tune/model_270.pth'

device = 'cuda:1'
'''vgg'''
vgg = vgg16(pretrained=False)
vgg.classifier[6] = nn.Linear(in_features=4096, out_features=125, bias=True)
vgg.load_state_dict(t.load(PHOTO_VGG, map_location=t.device('cpu')))
vgg.cuda()

ext = Extractor(pretrained=False)
ext.reload_model(vgg)

photo_feature = ext.extract_with_dataloader(test_photo_root,
                                            'photo-vgg-190epoch.pkl')

vgg.load_state_dict(t.load(SKETCH_VGG, map_location=t.device('cpu')))
ext.reload_model(vgg)

sketch_feature = ext.extract_with_dataloader(test_set_root,
                                             'sketch-vgg-190epoch.pkl')
'''resnet'''
resnet = resnet50()
resnet.fc = nn.Linear(in_features=2048, out_features=125)
resnet.load_state_dict(t.load(PHOTO_RESNET, map_location=t.device('cpu')))
resnet.cuda()
Esempio n. 10
0
            duplicate_decision = False
        inliners, total = (inliners1,
                           total1) if inliners1 >= inliners2 else (inliners2,
                                                                   total2)
        msg += '\n%d out of %d keypoints matched.' % (inliners, total)
    if verbose:
        print(msg)
    return duplicate_decision, candidate_id, candidate_path


if __name__ == '__main__':
    args = parser.parse_args()
    print('Loading hash database ...')
    db = get_database_reader(args.hash_database)
    ths = db.get_thresholds()

    print('Loading search index ...')
    with open(args.search_index, 'rb') as f:
        search = pickle.load(f)

    extract = Extractor()  # this object can be reused
    img_lst = pd.read_csv(
        args.image_list,
        header=None)[0].tolist()  # list of paths of database images

    dup_decision, nearest_img_id, nearest_img_path = neardup_detect(
        args.input, extract, img_lst, ths, search, args.verbose)
    print('Closest image id: #%d, path: %s' %
          (nearest_img_id, nearest_img_path))
    print('Final decision: Duplication detect? {}'.format(dup_decision))
def get_extractor():
    extractor = Extractor()
    return extractor