Exemple #1
0
 def __init__(self, word_vectors, companies, styles, materials, items,
              probase_brands, probase_materials, patterns,
              top_category_items, deep_detectStartup, confFilePath, tfidf):
     self.conf = json.load(open(confFilePath))
     self.tfidf = tfidf
     self.api_key = self.conf["google_api_key_path"]
     self.deep_detect_models = self.conf["deep_detect_models"]
     self.CAPTION_FACTOR = self.conf["caption_factor"]
     self.COMMENTS_FACTOR = self.conf["comments_factor"]
     self.USERTAG_FACTOR = self.conf["usertag_factor"]
     self.HASHTAG_FACTOR = self.conf["hashtag_factor"]
     if deep_detectStartup:
         self.dd = DD(self.conf["deep_detect_host"],
                      port=self.conf["deep_detect_port"])
         self.startup_deep_detect()
     self.wordvec_model = gensim.models.KeyedVectors.load_word2vec_format(
         word_vectors, binary=False)
     self.companies = companies
     self.styles = styles
     self.materials = materials
     self.items = items
     self.brands_keywords_google = []
     self.materials_keywords_google = []
     self.probase_brands = probase_brands
     self.probase_materials = probase_materials
     self.colors = []
     self.patterns = patterns
     self.top_category_items = top_category_items
     self.lemmatize()
Exemple #2
0
def predict(service, chart, image_filenames):
    # setting up DD client
    host = 'localhost'
    sname = config['REPO'][service]['NAME']
    dd = DD(host)
    dd.set_return_format(dd.RETURN_PYTHON)

    parameters_input = {}
    parameters_mllib = {}
    parameters_output = {
        "best":
        10,
        "template":
        "{{#body}}{{#predictions}} "
        "{ \"index\": {\"_index\": \"objects-10\", \"_type\": \"img\" } }\n "
        "{ \"uri\": \"{{uri}}\", "
        "\"chart\": \"" + chart + "\", "
        # "\"artist\": \"" + artist + "\", "
        "\"categories\": [ {{#classes}} "
        "{ \"category\": \"{{cat}}\", "
        "\"score\":{{prob}} } "
        "{{^last}},{{/last}}{{/classes}} ] }\n "
        "{{/predictions}}{{/body}} \n",
        "network": {
            "url": "host.docker.internal:9200/objects-10/_bulk",
            "http_method": "POST"
        }
    }

    predict = dd.post_predict(sname, image_filenames, parameters_input,
                              parameters_mllib, parameters_output)
 def __init__(self,
              dnnmodel,
              image_files,
              index_repo,
              batch_size=32,
              dd_host='localhost',
              dd_port=8080,
              dd_description='image classification',
              meta_in='',
              meta_out='',
              captions_in='',
              captions_out='',
              mapi_in='',
              mapi_out=''):
     self.dd_host = dd_host
     self.dd_port = dd_port
     self.dd_description = dd_description
     self.dd_mllib = 'caffe'
     self.meta_in = meta_in
     self.meta_out = meta_out
     self.captions_in = captions_in
     self.captions_out = captions_out
     self.mapi_in = mapi_in
     self.mapi_out = mapi_out
     self.gpuid = 0
     self.dnnmodel = dnnmodel
     if self.dnnmodel.extract_layer:
         self.dd_mltype = 'unsupervised'
     else:
         self.dd_mltype = 'supervised'
     self.image_files = image_files
     self.batch_size = batch_size
     self.binarized = False
     self.dd = DD(self.dd_host, self.dd_port)
     self.dd.set_return_format(self.dd.RETURN_PYTHON)
     self.index_repo = index_repo + '/' + self.dnnmodel.name
     try:
         os.mkdir(self.index_repo)
     except:
         #logger.warning('directory ' + self.index_repo + ' may alreay exist')
         pass
     self.st = {}  # shelve used for full tags storage
     self.stm = {}  # in memory tmp storage
     if self.dd_mltype == 'supervised':
         self.st = shelve.open(self.index_repo + '/tags.bin')
     self.delete_dd_service()
Exemple #4
0
 def __init__(self,structure,logger,config):
     """ Instanciate a model trainer
     :param dic structure: Model Trainer specific settings
         eg: {"model-repo":"../models/mymodel","training-repo":"../training/mytraining","sname":"MyTrainer","test_split":0.01,"base-lr":0.01,"clevel":False,"sequence":140,"iterations":50000,"test_interval":1000,"stepsize":15000,"destroy":True,"resume":False,"finetune":False,"weights":"","nclasses":2,"documents":True,"batch-size":128,"test-batch-size":16,"gpuid":0,"mllib":"xgboost","lregression":False}
         *model-repo* location of the model
         *training-repo* location of the training files
         *sname* service name
         *test_plit* training split between 0 and < 1,type=float,default=0.01
         *base_lr* initial learning rate,default=0.01,type=float
         *clevel* character-level convolutional net,type=boolean
         *sequence* sequence length for character level models,default=140,type=int
         *iterations* number of iterations,default=50000,type=int
         *test_interval* test interval',default=1000,type=int
         *stepsize* lr policy stepsize',default=15000,type=int
         *destroy* whether to destroy model',type=boolean
         *resume* whether to resume training,type=boolean
         *finetune* whether to finetune,type=boolean
         *weights* pre-trained weight file, when finetuning
         *nclasses* number of classes,type=int,default=2
         *documents* whether to train from text documents (as opposed to sentences in one doc),type=boolean
         *batch_size* batch size,type=int,default=128
         *test_batch_size* test batch size,type=int,default=16
         *gpu* enable gpu usage is True, default=False
         *gpuid* specify gpu id,type=int,default=0
         *mllib* caffe or xgboost,default='caffe'
         *lregression* whether to use logistic regression,type=boolean
     :param obj logger: DFM logger object
     :param obj storage: DFM storage object
     :param obj config: DFM global config object
     :returns: ModelTrainer object (instance of a modeltrainer class)
     """
     self.config=config
     self.structure=structure
     self.logger=logger
     self.nclasses = self.structure['nclasses']
     self.description = 'classifier'
     self.sname=self.structure['sname']
     self.mllib = self.structure['mllib']
     self.dd = DD(config['DEEP_DETECT_URI'],config['DEEP_DETECT_PORT'])
     self.dd.set_return_format(self.dd.RETURN_PYTHON)
Exemple #5
0
def segment(image, nclasses=150, port=8080, host="localhost"):
    random.seed(134124)
    model_dir = '/home/model'
    sname = 'segserv'
    description = 'image segmentation'
    mllib = 'caffe'
    mltype = 'unsupervised'
    dd = DD(host, port)
    dd.set_return_format(dd.RETURN_PYTHON)

    def random_color():
        ''' generate rgb using a list comprehension '''
        r, g, b = [random.randint(0, 255) for i in range(3)]
        return [r, g, b]

    raw_img = plt.imread("/home/ubuntu/model/" + image).astype("float32") / 255
    width, height = raw_img.shape[:2]
    #width = 480
    #height = 480
    # creating ML service
    model_repo = model_dir
    if not model_repo:
        model_repo = os.getcwd() + '/model/'
    model = {'repository': model_repo}
    parameters_input = {'connector': 'image', 'width': width, 'height': height}
    parameters_mllib = {'nclasses': nclasses}
    parameters_output = {}
    try:
        servput = dd.put_service(sname, model, description, mllib,
                                 parameters_input, parameters_mllib,
                                 parameters_output, mltype)
    except:  # most likely the service already exists
        pass

    # prediction call
    parameters_input = {'segmentation': True}
    parameters_mllib = {'gpu': True, 'gpuid': 0}
    parameters_output = {}
    data = ["/home/model/" + image]
    detect = dd.post_predict(sname, data, parameters_input, parameters_mllib,
                             parameters_output)

    pixels = np.array((map(int, detect['body']['predictions'][0]['vals'])))
    imgsize = detect['body']['predictions'][0]['imgsize']

    # visual output
    label_colours = []
    for c in range(nclasses):
        label_colours.append(random_color())
    label_colours = np.array(label_colours)

    r = pixels.copy()
    g = pixels.copy()
    b = pixels.copy()
    for l in range(0, nclasses):
        r[pixels == l] = label_colours[l, 0]
        g[pixels == l] = label_colours[l, 1]
        b[pixels == l] = label_colours[l, 2]

    r = np.reshape(r, (imgsize['height'], imgsize['width']))
    g = np.reshape(g, (imgsize['height'], imgsize['width']))
    b = np.reshape(b, (imgsize['height'], imgsize['width']))
    rgb = np.zeros((imgsize['height'], imgsize['width'], 3))
    rgb[:, :, 0] = r / 255.0
    rgb[:, :, 1] = g / 255.0
    rgb[:, :, 2] = b / 255.0
    print(rgb[0, 0])
    body_mask = np.where(rgb * 255 == np.array([47, 197, 233]), 1, 0)

    result = body_mask * raw_img
    plt.imsave("result.png", result)
    return result
Exemple #6
0
                                            service_dict["test_split"] = test_split
                                            service_dict["min_count"] = min_count
                                            service_dict["min_word_length"] = min_word_length
                                            service_dict["batch_size"] = batch_size
                                            service_dict["test_interval"] = test_interval
                                            services_list.append(service_dict)


#Create folders for all models
for service in services_list:
    directory = root_repository+service['service_name']
    if not os.path.exists(directory):
        os.makedirs(directory)

#Connect to DD
dd = DD(dede_server)
dd.set_return_format(dd.RETURN_PYTHON)

#Start the creation and training of services, pulling data every 10sec
service_count = 1
for service in services_list:
    #Get start time value to avoid duplicate runs of the same service to overlap
    start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    log_file.write("service number "+str(service_count)+" of "+str(len(services_list))+"\n")
    log_file.flush()
    service_count += 1
    #create the service
    service_name = service['service_name']
    log_file.write("Starting test for "+service_name+"\n")
    log_file.flush()
    if service['template'] == 'mlp':
Exemple #7
0
    imgquery = cv2.imread(imgfile)
    r = width / imgquery.shape[1]
    dim = (int(width), int(imgquery.shape[0] * r))
    small = cv2.resize(imgquery,dim)
    return small

host = 'localhost'
sname = 'imageserv'
description = 'image classification'
mllib = 'caffe'
mltype = 'supervised'
extract_layer = 'rois'
nclasses = args.nclasses
layer_size = 512 # auto anyways
width = height = 300
dd = DD(host)
dd.set_return_format(dd.RETURN_PYTHON)
ntrees = 1000
metric = 'angular'  # or 'euclidean'

# creating ML service
model_repo = os.getcwd() + '/' + args.model_dir
model = {'repository':model_repo,'templates':'../templates/caffe/'}
parameters_input = {'connector':'image','width':width,'height':height}
parameters_mllib = {'nclasses':nclasses}
parameters_output = {}
try:
    dd.put_service(sname,model,description,mllib,
                   parameters_input,parameters_mllib,parameters_output,mltype)
except:
    pass
Exemple #8
0
parser.add_argument('--max-batch-size',
                    help='max batch size to be tested',
                    type=int,
                    default=256)
parser.add_argument('--list-bench-files',
                    help='file holding the list of bench files',
                    default='list_bench_files.txt')
parser.add_argument('--npasses',
                    help='number of passes for every batch size',
                    type=int,
                    default=5)
args = parser.parse_args()

host = args.host
port = args.port
dd = DD(host, port)
dd.set_return_format(dd.RETURN_PYTHON)

list_bench_files = []
with open(args.list_bench_files) as f:
    for l in f:
        list_bench_files.append(args.remote_bench_data_dir + '/' + l.rstrip())
init_batch_size = 1
batch_sizes = []
l = init_batch_size
while l <= args.max_batch_size:
    batch_sizes.append(l)
    if l < 32:
        l = l * 2
    else:
        l += 16
Exemple #9
0
class InformationExtractor(object):
    """ Module with functions for information Extraction """
    wordnet_lemmatizer = WordNetLemmatizer()

    #External service URLs
    google_service_url = 'https://kgsearch.googleapis.com/v1/entities:search'
    probase_service_url = "https://concept.research.microsoft.com/api/Concept/ScoreByProb"
    #DD constants
    height = width = 224
    nclasses_clothing = 304
    nclasses_bags = 37
    nclasses_footwear = 51
    nclasses_fabric = 233

    #setting up DD client
    mllib = 'caffe'

    def __init__(self, word_vectors, companies, styles, materials, items,
                 probase_brands, probase_materials, patterns,
                 top_category_items, deep_detectStartup, confFilePath, tfidf):
        self.conf = json.load(open(confFilePath))
        self.tfidf = tfidf
        self.api_key = self.conf["google_api_key_path"]
        self.deep_detect_models = self.conf["deep_detect_models"]
        self.CAPTION_FACTOR = self.conf["caption_factor"]
        self.COMMENTS_FACTOR = self.conf["comments_factor"]
        self.USERTAG_FACTOR = self.conf["usertag_factor"]
        self.HASHTAG_FACTOR = self.conf["hashtag_factor"]
        if deep_detectStartup:
            self.dd = DD(self.conf["deep_detect_host"],
                         port=self.conf["deep_detect_port"])
            self.startup_deep_detect()
        self.wordvec_model = gensim.models.KeyedVectors.load_word2vec_format(
            word_vectors, binary=False)
        self.companies = companies
        self.styles = styles
        self.materials = materials
        self.items = items
        self.brands_keywords_google = []
        self.materials_keywords_google = []
        self.probase_brands = probase_brands
        self.probase_materials = probase_materials
        self.colors = []
        self.patterns = patterns
        self.top_category_items = top_category_items
        self.lemmatize()

    def lemmatize(self):
        """ Lemmatize domain lists"""
        self.styles_lemmas = {
            self.wordnet_lemmatizer.lemmatize(style): style
            for style in self.styles
        }
        self.materials_lemmas = {
            self.wordnet_lemmatizer.lemmatize(material): material
            for material in self.materials
        }
        self.items_lemmas = {
            self.wordnet_lemmatizer.lemmatize(item): item
            for item in self.items
        }

    def find_closest_semantic(self, caption, comments, tags, hashtags,
                              segmented_hashtags, num, topic, id):
        """ Finds num semantically closest candidates for a given topic"""
        topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"),
                    topic)
        freq_scores = {}
        for x in topic:
            freq_scores[x] = 0.0
        for token in caption:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity(token, token2, token2Lemma,
                                                   self.CAPTION_FACTOR,
                                                   self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in comments:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity(token, token2, token2Lemma,
                                                   self.COMMENTS_FACTOR,
                                                   self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in hashtags:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity(token, token2, token2Lemma,
                                                   self.HASHTAG_FACTOR,
                                                   self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in segmented_hashtags:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity(token, token2, token2Lemma,
                                                   self.HASHTAG_FACTOR,
                                                   self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in tags:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity(token, token2, token2Lemma,
                                                   self.USERTAG_FACTOR,
                                                   self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        top = sorted([(k, v) for k, v in freq_scores.iteritems()],
                     reverse=True,
                     key=lambda x: x[1])[:num]
        return top

    def token_similarity(self, token, token2, token2Lemma, factor, tfidf):
        """ Returns similarity between two tokens using cosine similarity between embeddings, edit distance and TFIDF weighting"""
        similarity = 0.0
        if isinstance(token, str):
            token = token.decode("utf-8", "ignore")
        tokenLemma = self.wordnet_lemmatizer.lemmatize(token)
        if tokenLemma in self.wordvec_model.wv.vocab and token2Lemma in self.wordvec_model.wv.vocab:
            if edit_distance(tokenLemma, token2Lemma) == 0:
                factor = factor * 10
            similarity = factor * math.pow(
                float(self.wordvec_model.wv.similarity(tokenLemma,
                                                       token2Lemma)), 2)
        else:
            dist = factor * edit_distance(tokenLemma, token2Lemma)
            similarity = float(1) / float(1 + math.pow(dist, 2))
        tfidf_score = 0.0
        if token in tfidf:
            tfidf_score = tfidf[token]
        if token.encode("utf-8") in tfidf:
            tfidf_score = tfidf[token.encode("utf-8")]
        tfidf_score = max(tfidf_score, 0.0001)
        similarity = similarity * tfidf_score
        return similarity

    def find_closest_syntactic(self, caption, comments, tags, hashtags,
                               segmented_hashtags, num, topic, id):
        """ Finds num semantically closest candidates for a given topic"""
        topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"),
                    topic)
        freq_scores = {}
        for x in topic:
            freq_scores[x] = 0.0
        for token in caption:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity_syntactic_only(
                    token, token2, token2Lemma, self.CAPTION_FACTOR,
                    self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in comments:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity_syntactic_only(
                    token, token2, token2Lemma, self.COMMENTS_FACTOR,
                    self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in hashtags:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity_syntactic_only(
                    token, token2, token2Lemma, self.HASHTAG_FACTOR,
                    self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in segmented_hashtags:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity_syntactic_only(
                    token, token2, token2Lemma, self.HASHTAG_FACTOR,
                    self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        for token in tags:
            scores = []
            for x in topic:
                token2 = x.lower()
                token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                similarity = self.token_similarity_syntactic_only(
                    token, token2, token2Lemma, self.USERTAG_FACTOR,
                    self.tfidf[id])
                scores.append((x, similarity))
            top = sorted(scores, reverse=True, key=lambda x: x[1])[:num]
            for x in top:
                freq_scores[x[0]] = freq_scores[x[0]] + x[1]
        top = sorted([(k, v) for k, v in freq_scores.iteritems()],
                     reverse=True,
                     key=lambda x: x[1])[:num]
        return top

    def token_similarity_syntactic_only(self, token, token2, token2Lemma,
                                        factor, tfidf):
        """ Returns similarity between two tokens using edit distance and TFIDF weighting"""
        tokenLemma = self.wordnet_lemmatizer.lemmatize(token)
        similarity = 0.0
        if edit_distance(tokenLemma, token2Lemma) == 0:
            factor = factor * 10
        dist = edit_distance(tokenLemma, token2Lemma)
        similarity = factor * (float(1) / float(1 + dist))
        tfidf_score = 0.0
        if token in tfidf:
            tfidf_score = tfidf[token]
        if token.encode("utf-8") in tfidf:
            tfidf_score = tfidf[token.encode("utf-8")]
        tfidf_score = max(tfidf_score, 0.0001)
        similarity = similarity * tfidf_score
        return similarity

    def lookup_google(self, params):
        """ Lookup in Google Search"""
        #curl "https://kgsearch.googleapis.com/v1/entities:search?query=bebe&key=<key>&limit=2&indent=True&types=Organization"
        url = self.google_service_url + '?' + urllib.urlencode(params)
        #result score = an indicator of how well the entity matched the request constraints.
        response = json.loads(urllib.urlopen(url).read())
        results = []
        if "itemListElement" in response:
            for element in response['itemListElement']:
                dict_result = {}
                if "resultScore" in element:
                    dict_result["resultScore"] = element['resultScore']
                if "result" in element:
                    if "detailedDescription" in element["result"]:
                        dict_result["detailedDescription"] = element["result"][
                            'detailedDescription']
                    if "description" in element["result"]:
                        dict_result["description"] = element["result"][
                            'description']
                    if "url" in element["result"]:
                        dict_result["url"] = element["result"]["url"]
                results.append(dict_result)
        return results

    def rank_google_result_company(self, results):
        """ Binary rank  of google search results"""
        for result in results:
            for keyword in self.brands_keywords_google:
                if "detailedDescription" in result:
                    if keyword in result["detailedDescription"]:
                        return 1
                if "description" in result:
                    if keyword in result["description"]:
                        return 1
        return 0.0

    def rank_google_result_material(self, results):
        """ Binary rank  of google search results"""
        for result in results:
            for keyword in self.materials_keywords_google:
                if keyword in result[
                        "detailedDescription"] or keyword in result[
                            "description"]:
                    return 1
        return 0.0

    def rank_probase_result_company(self, result):
        """Probase probability ranking [0,1]"""
        keywords = filter(lambda x: x in result, self.probase_brands)
        keywords = map(lambda x: result[x], keywords)
        if len(keywords) > 0:
            return 1 + max(keywords)
        else:
            return 0.5

    def rank_probase_result_material(self, result):
        """Probase probability ranking [0,1]"""
        keywords = filter(lambda x: x in result, self.probase_materials)
        keywords = map(lambda x: result[x], keywords)
        if len(keywords) > 0:
            return 1 + max(keywords)
        else:
            return 0.5

    def lookup_probase(self, params):
        """Probase lookup"""
        #curl "https://concept.research.microsoft.com/api/Concept/ScoreByProb?instance=adidas&topK=10"
        url = self.probase_service_url + '?' + urllib.urlencode(params)
        response = json.loads(urllib.urlopen(url).read())
        return response

    def get_liketoknowitlinks(self, tokens):
        """ Extract liketoknowit links"""
        links = []
        for token in tokens:
            match = re.search("http://liketk.it/([^\s]+)", token)
            if match is not None:
                link = match.group(0)
                links.append(link)
        return links

    def lda_topic_models(self, num_topics, num_iter, min_occ, docs):
        """ Extract LDA topic models """
        cvectorizer = CountVectorizer(min_df=min_occ, stop_words="english")
        cvz = cvectorizer.fit_transform(docs)
        lda_model = lda.LDA(n_topics=num_topics, n_iter=num_iter)
        X_topics = lda_model.fit_transform(cvz)
        _lda_keys = []
        for i in xrange(X_topics.shape[0]):
            _lda_keys.append(X_topics[i].argmax())
        topic_summaries = []
        topic_word = lda_model.topic_word_  # all topic words
        n_top_words = 5
        vocab = cvectorizer.get_feature_names()
        for i, topic_dist in enumerate(topic_word):
            topic_words = np.array(vocab)[np.argsort(
                topic_dist)][:-(n_top_words + 1):-1]  # get!
            topic_summaries.append(' '.join(topic_words))
        return topic_summaries

    def get_top_num(self, coll, num):
        """ Extract top 10 ranked items"""
        top, counts = zip(*Counter(coll).most_common(num))
        return list(top)

    def get_wikipedia_vote(self, query):
        """ Wikipedia lookup binary rank"""
        pages = wikipedia.search(query)
        for pageName in pages:
            try:
                page = wikipedia.page(pageName)
                content = page.content.lower()
                for keyword in self.brands_keywords_google:
                    if keyword in content:
                        return 1
            except:
                return 0.0
        return 0.0

    def get_google_search_vote(self, query):
        """ Google search lookup binary rank"""
        try:
            response = GoogleSearch().search(query)
            for result in response.results:
                text = result.getText().lower()
                title = result.title.lower()
                for keyword in self.brands_keywords_google:
                    if keyword in text or keyword in title:
                        return 1
        except:
            return 0
        return 0

    def emoji_classification(self, emojis, num):
        """ Emoji classification """
        items = {}
        for item in self.items_lemmas.keys():
            items[item] = 0.0
        for emoji in emojis:
            item_matches = self.emoji_to_item(emoji)
            for item_m in item_matches:
                items[item_m] = items[item_m] + 1
        top = sorted([(k, v) for k, v in items.iteritems()],
                     reverse=True,
                     key=lambda x: x[1])[:num]
        return top

    def emoji_to_item(self, token):
        """Classify item based on emojis"""
        if token == u"👕":
            return ["shirt", "top"]
        if token == u"👖":
            return ["jean", "trouser", "legging", "jogger"]
        if token == u"👗":
            return ["dress"]
        if token == u"👚":
            return ["blouse", "shirt"]
        if token == u"👛":
            ["purse", "bag", "handbag"]
        if token == u"👜":
            return ["bag", "handbag"]
        if token == u"👝" or token == u"🎒 ":
            return ["bag"]
        if token == u"👞":
            return ["shoe", "boot"]
        if token == u"👟":
            return ["trainer", "shoe", "boot"]
        if token == u"👠" or token == u"👡 " or token == u"👢":
            return ["heel", "shoe"]
        if token == u"👒" or token == u"🎩":
            return ["hat"]
        return []

    def map_candidates_to_ontology(self, candidates):
        """ Map candidates from external APIs to our classes"""
        topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"),
                    self.top_category_items)
        freq_scores = {}
        for x in topic:
            parts = x.split(",")
            label = parts[0]
            freq_scores[label] = 0.0
        for token in candidates:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity(token[0], token2,
                                                       token2Lemma,
                                                       self.CAPTION_FACTOR)
                    scores.append(similarity * math.pow(token[1], 2))

                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        return freq_scores

    def liketkit_classification(self, url):
        """ Liketkit link scraping """
        text = []
        try:
            driver = webdriver.PhantomJS()
            driver.get(url)
            p_element = driver.find_element_by_class_name("ltk-products")
            products = p_element.find_elements_by_xpath(".//*")
            urls = []
            for prod in products:
                urls.append(prod.get_attribute("href"))
            for url in urls:
                driver.get(url)
                html = driver.page_source
                soup = BeautifulSoup(html, "lxml")
                data = soup.findAll(text=True, recursive=True)
                text.extend(list(data))
                return text
        except:
            print("error in liketkit classification")
            return text

    def google_vision_lookup(self, imagePath):
        """ Google vision API lookup """
        item_candidates = []
        try:
            # Instantiates a client
            client = vision.ImageAnnotatorClient()

            # The name of the image file to annotate
            file_name = os.path.join(os.path.dirname(__file__), imagePath)

            # Loads the image into memory
            with io.open(file_name, 'rb') as image_file:
                content = image_file.read()

            image = types.Image(content=content)

            # Performs label detection on the image file
            response = client.label_detection(image=image)
            labels = response.label_annotations
            for label in labels:
                item_candidates.append((label.description, label.score))
            return item_candidates
        except:
            print("error in google_vision_LF")
            return item_candidates

    def deep_detect_lookup(self, link):
        """ Deep detect local lookup"""
        items_and_fabrics = {}
        items_and_fabrics["items"] = []
        items_and_fabrics["fabrics"] = []
        try:
            parameters_input = {}
            parameters_mllib = {}
            parameters_output = {'best': 10}
            data = [link]
            clothing_res = self.dd.post_predict(self.sname_clothing, data,
                                                parameters_input,
                                                parameters_mllib,
                                                parameters_output)
            body = clothing_res[u"body"]
            predictions = body[u"predictions"]
            classes = predictions[0][u"classes"]
            for c in classes:
                items = c[u"cat"].strip(" ").split(",")
                prob = c[u"prob"]
                for item in items:
                    items_and_fabrics["items"].append((item, prob))

            bags_res = self.dd.post_predict(self.sname_bags, data,
                                            parameters_input, parameters_mllib,
                                            parameters_output)
            body = bags_res[u"body"]
            predictions = body[u"predictions"]
            classes = predictions[0][u"classes"]
            for c in classes:
                items = c[u"cat"].strip(" ").split(",")
                prob = c[u"prob"]
                for item in items:
                    items_and_fabrics["items"].append((item, 0.5 * prob))

            footwear_res = self.dd.post_predict(self.sname_footwear, data,
                                                parameters_input,
                                                parameters_mllib,
                                                parameters_output)
            body = footwear_res[u"body"]
            predictions = body[u"predictions"]
            classes = predictions[0][u"classes"]
            for c in classes:
                items = c[u"cat"].strip(" ").split(",")
                prob = c[u"prob"]
                for item in items:
                    items_and_fabrics["items"].append((item, 0.5 * prob))

            fabric_res = self.dd.post_predict(self.sname_fabric, data,
                                              parameters_input,
                                              parameters_mllib,
                                              parameters_output)
            body = fabric_res[u"body"]
            predictions = body[u"predictions"]
            classes = predictions[0][u"classes"]
            for c in classes:
                items = c[u"cat"].strip(" ").split(",")
                prob = c[u"prob"]
                for item in items:
                    items_and_fabrics["fabrics"].append((item, prob))
            return items_and_fabrics
        except:
            print("error in deep_detect_LF")
            return items_and_fabrics

    def startup_deep_detect(self):
        """ Startup services for deep detect classification """
        self.dd.set_return_format(self.dd.RETURN_PYTHON)
        for model in self.deep_detect_models:
            m = {"repository": model["path"]}
            parameters_input = {
                'connector': 'image',
                'width': self.width,
                'height': self.height
            }
            parameters_mllib = {'nclasses': self.nclasses_clothing}
            parameters_output = {}
            self.dd.put_service(model["name"], model, model["description"],
                                self.mllib, parameters_input, parameters_mllib,
                                parameters_output)

    def deepomatic_lookup(self, link):
        """ Deepomatic API lookup """
        item_candidates = []
        try:
            client = Client(529372386976, self.conf["deepomatic_api_key"])
            task = client.helper.get("/detect/fashion/?url=" + link)
            taskid = task[u"task_id"]
            i = 0
            while i < 10:
                sleep(0.1)  #100ms
                res = client.helper.get("/tasks/" + str(taskid) + "/")
                task = res[u"task"]
                status = task[u"status"]
                if status == u"success" or status == "success":
                    data = task[u"data"]
                    boxes = data[u"boxes"]
                    for item in boxes.keys():
                        info = boxes[item]
                        probability = 0.0
                        for inf in info:
                            probability = probability + inf[u"proba"]
                        item_candidates.append(
                            (item.encode("utf-8"), probability))
                    i = 10
                else:
                    i += 1
            return item_candidates
        except:
            print("error in deepomaticLF")
            return item_candidates

    def clarifai_lookup(self, link):
        """ Clarifai API lookup"""
        item_candidates = []
        try:
            app = ClarifaiApp(api_key=self.conf["clarifai_api_key"])
            model = app.models.get('apparel')
            image = ClImage(url=link)
            res = model.predict([image])
            outputs = res[u"outputs"]
            for output in outputs:
                data = output[u"data"]
                concepts = data[u"concepts"]
                for concept in concepts:
                    concept_parts = concept[u"name"].encode("utf-8").split(" ")
                    val = concept[u"value"]
                    for part in concept_parts:
                        item_candidates.append((part, val))

            return item_candidates
        except:
            print("error in clarifai LF")
            return item_candidates

    def find_closest_semantic_hierarchy(self, caption, comments, tags,
                                        hashtags, topic, id, num):
        """ Finds num semantically closest candidates for a given topic with multiple words per topic"""
        topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"),
                    topic)
        freq_scores = {}
        for x in topic:
            parts = x.split(",")
            label = parts[0]
            freq_scores[label] = 0.0
        for token in caption:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity(token, token2,
                                                       token2Lemma,
                                                       self.CAPTION_FACTOR,
                                                       self.tfidf[id])
                    scores.append(similarity)
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        for token in comments:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity(token, token2,
                                                       token2Lemma,
                                                       self.COMMENTS_FACTOR,
                                                       self.tfidf[id])
                    scores.append(similarity)
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        for token in hashtags:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity(token, token2,
                                                       token2Lemma,
                                                       self.HASHTAG_FACTOR,
                                                       self.tfidf[id])
                    scores.append(similarity)
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        for token in tags:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity(token, token2,
                                                       token2Lemma,
                                                       self.USERTAG_FACTOR,
                                                       self.tfidf[id])
                    scores.append(similarity)
                    acc_sim = acc_sim + similarity
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        top = sorted([(k, v) for k, v in freq_scores.iteritems()],
                     reverse=True,
                     key=lambda x: x[1])[:num]
        return top

    def find_closest_syntactic_hierarchy(self, caption, comments, tags,
                                         hashtags, topic, id, num):
        """ Finds num syntactically closest candidates for a given topic, with multiple words per topic"""
        topic = map(lambda x: x.decode('utf-8', 'ignore').encode("utf-8"),
                    topic)
        freq_scores = {}
        for x in topic:
            parts = x.split(",")
            label = parts[0]
            freq_scores[label] = 0.0
        for token in caption:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity_syntactic_only(
                        token, token2, token2Lemma, self.CAPTION_FACTOR,
                        self.tfidf[id])
                    scores.append(similarity)
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        for token in comments:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity_syntactic_only(
                        token, token2, token2Lemma, self.COMMENTS_FACTOR,
                        self.tfidf[id])
                    scores.append(similarity)
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        for token in hashtags:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity_syntactic_only(
                        token, token2, token2Lemma, self.HASHTAG_FACTOR,
                        self.tfidf[id])
                    scores.append(similarity)
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        for token in tags:
            for x in topic:
                parts = x.split(",")
                label = parts[0]
                words = parts[1].split(" ")
                acc_sim = 0
                scores = []
                for word in words:
                    token2 = word.lower()
                    token2Lemma = self.wordnet_lemmatizer.lemmatize(token2)
                    similarity = self.token_similarity_syntactic_only(
                        token, token2, token2Lemma, self.USERTAG_FACTOR,
                        self.tfidf[id])
                    scores.append(similarity)
                    acc_sim = acc_sim + similarity
                acc_sim = acc_sim + max(scores)
                freq_scores[label] = freq_scores[label] + acc_sim
        top = sorted([(k, v) for k, v in freq_scores.iteritems()],
                     reverse=True,
                     key=lambda x: x[1])[:num]
        return top
Exemple #10
0
    def __init__(self,
                sname,
                host = "127.0.0.1",
                port = "8080",
                proto = 0,
                api_path = "",
                model = None,
                models = [],
                models_dir = "/opt/platform/models/",
                datafiles = [],
                datadir = "",
                output_dir = "/temp/predictions/",
                columns = [],
                target_cols = [],
                ignored_cols = [],
                offset=50,
                gpuid = 0,
                autoregressive = False,
                sam = False,
                batch_size = 50,
                iter_size = 1,
                iterations = 500000,
                base_lr=0.001,
                test_interval = 5000,
                anomaly_params = AnomalyParameters(),
                display_progress = True):

        self.sname = sname
        self.models = models
        self.models_dir = models_dir
        self.datafiles = datafiles
        self.datadir = datadir
        self.output_dir = output_dir
        self.columns = columns
        self.target_cols = target_cols
        self.ignored_cols = ignored_cols
        self.offset = offset
        self.batch_size = batch_size
        self.gpuid = gpuid
        self.autoregressive = autoregressive
        self.display_progress = display_progress

        self.solver_params = {
            "iter_size": iter_size,
            "iterations": iterations,
            "base_lr": base_lr,
            "test_interval": test_interval,
            "sam": sam
        }

        """
        shift: How much the target is being shifted. As models predict
        at different horizons, shift enable to compare the same sections
        of the targets even if the target is shifted by a certain number
        of timesteps.
        """
        self.shift = 0

        # error based anomaly detection
        self.anomaly_params = anomaly_params
        self.anomaly_params.labels = self.target_cols

        # dict {dataset: target}
        self.targs = {}
        # dict { dataset: {model: preds / errors}}
        self.preds = {}
        # signed error
        self.errors = {}

        self.dd = DD(host, port, proto, api_path)
Exemple #11
0
parser.add_argument("--confidences",type=str,help="whether to output the confidence map, e.g. best",default='')
args = parser.parse_args();

host = 'localhost'
port = 8080
sname = 'segserv'
description = 'image segmentation'
mllib = args.mllib
if mllib == 'caffe':
    mltype = 'unsupervised'
else:
    mltype = 'supervised'
nclasses = args.nclasses
width = args.width
height = args.height
dd = DD(host,port)
dd.set_return_format(dd.RETURN_PYTHON)

def random_color():
    ''' generate rgb using a list comprehension '''
    r, g, b = [random.randint(0,255) for i in range(3)]
    return [r, g, b]

# creating ML service
model_repo = args.model_dir
if not model_repo:
    model_repo = os.getcwd() + '/model/'
model = {'repository':model_repo}
parameters_input = {'connector':'image','width':width,'height':height}
parameters_mllib = {'nclasses':nclasses,'segmentation':True,'gpu':True,'gpuid':0}
parameters_output = {}
Exemple #12
0
parser.add_argument("--nclasses",help="number of classes",type=int,default=150)
parser.add_argument("--width",help="image width",type=int,default=480)
parser.add_argument("--height",help="image height",type=int,default=480)
parser.add_argument("--model-dir",help="model directory")
args = parser.parse_args();

host = 'localhost'
port = 8080
sname = 'segserv'
description = 'image segmentation'
mllib = 'caffe'
mltype = 'unsupervised'
nclasses = args.nclasses
width = args.width
height = args.height
dd = DD(host,port)
dd.set_return_format(dd.RETURN_PYTHON)

def random_color():
    ''' generate rgb using a list comprehension '''
    r, g, b = [random.randint(0,255) for i in range(3)]
    return [r, g, b]

# creating ML service
model_repo = args.model_dir
if not model_repo:
    model_repo = os.getcwd() + '/model/'
model = {'repository':model_repo}
parameters_input = {'connector':'image','width':width,'height':height}
parameters_mllib = {'nclasses':nclasses}
parameters_output = {}
Exemple #13
0
import sys
import json
from dd_client import DD

with open('config.json', 'r') as f:
    config = json.load(f)

service = sys.argv[1]

MODEL_REPO = config['REPO'][service]['PATH']
nclasses = config['REPO'][service]['CLASS_COUNT']
height = width = config['REPO'][service]['IMAGE_SIZE']

# setting up DD client
host = 'localhost'
sname = config['REPO'][service]['NAME']
description = config['REPO'][service]['DESCRIPTION']
mllib = config['REPO'][service]['LIBRARY']
dd = DD(host)
dd.set_return_format(dd.RETURN_PYTHON)

# creating ML service
model = {'repository': MODEL_REPO}
parameters_input = {'connector': 'image', 'width': width, 'height': width}
parameters_mllib = {'nclasses': nclasses}
parameters_output = {}

dd.put_service(sname, model, description, mllib, parameters_input,
               parameters_mllib, parameters_output)
Exemple #14
0
    'footwear': {'backend':'caffe','nclasses':51,'width':224,'height':224,'path':base_path_caffe},
    'sports': {'backend':'caffe','nclasses':143,'width':224,'height':224,'path':base_path_caffe},
    'furnitures': {'backend':'caffe','nclasses':179,'width':224,'height':224,'path':base_path_caffe},
}

parser = argparse.ArgumentParser()
parser.add_argument('--host',help='AMI public IP')
parser.add_argument('--model-name',help='model name, e.g. googlenet, resnet_50, age_model, gender, clothing, see https://deepdetect.com/products/ami/ for full list',default='googlenet')
parser.add_argument('--info',help='simple info call to remote DeepDetect server',action='store_true')
parser.add_argument('--create-service',help='whether to create service',action='store_true')
parser.add_argument('--delete',help='wether to delete service',action='store_true')
parser.add_argument('--img-url',help='URL of image to classify')
args = parser.parse_args()

host = args.host
dd = DD(host,8080)
dd.set_return_format(dd.RETURN_PYTHON)

if not args.model_name in models_config:
    print('Unknown model=',args.model_name)
    sys.exit()

model_config = models_config[args.model_name]

# info call
if args.info:
    info = dd.info()
    print(info)
    sys.exit()

if args.delete:
Exemple #15
0
parser.add_argument('--port',help='server port',type=int,default=8080)
parser.add_argument('--sname',help='service name')
parser.add_argument('--img-width',help='image width',type=int,default=224)
parser.add_argument('--img-height',help='image height',type=int,default=224)
parser.add_argument('--gpu',help='whether to bench GPU',action='store_true')
parser.add_argument('--cpu',help='whether to bench CPU',action='store_true')
parser.add_argument('--remote-bench-data-dir',help='when bench data directory, when available remotely on the server')
parser.add_argument('--max-batch-size',help='max batch size to be tested',type=int,default=256)
parser.add_argument('--list-bench-files',help='file holding the list of bench files',default='list_bench_files.txt')
parser.add_argument('--npasses',help='number of passes for every batch size',type=int,default=5)
parser.add_argument('--detection',help='whether benching a detection model',action='store_true')
args = parser.parse_args()

host = args.host
port = args.port
dd = DD(host,port)
dd.set_return_format(dd.RETURN_PYTHON)

list_bench_files = []
with open(args.list_bench_files) as f:
    for l in f:
        list_bench_files.append(args.remote_bench_data_dir + '/' + l.rstrip())
init_batch_size = 1
batch_sizes = []
l = init_batch_size
while l <= args.max_batch_size:
    batch_sizes.append(l)
    if l < 32:
        l = l * 2
    else:
        l += 16
Exemple #16
0
class ModelTrainer:
    """ Prediction Model trainer class
        binary char-based model training class
    """
    def __init__(self,structure,logger,config):
        """ Instanciate a model trainer
        :param dic structure: Model Trainer specific settings
            eg: {"model-repo":"../models/mymodel","training-repo":"../training/mytraining","sname":"MyTrainer","test_split":0.01,"base-lr":0.01,"clevel":False,"sequence":140,"iterations":50000,"test_interval":1000,"stepsize":15000,"destroy":True,"resume":False,"finetune":False,"weights":"","nclasses":2,"documents":True,"batch-size":128,"test-batch-size":16,"gpuid":0,"mllib":"xgboost","lregression":False}
            *model-repo* location of the model
            *training-repo* location of the training files
            *sname* service name
            *test_plit* training split between 0 and < 1,type=float,default=0.01
            *base_lr* initial learning rate,default=0.01,type=float
            *clevel* character-level convolutional net,type=boolean
            *sequence* sequence length for character level models,default=140,type=int
            *iterations* number of iterations,default=50000,type=int
            *test_interval* test interval',default=1000,type=int
            *stepsize* lr policy stepsize',default=15000,type=int
            *destroy* whether to destroy model',type=boolean
            *resume* whether to resume training,type=boolean
            *finetune* whether to finetune,type=boolean
            *weights* pre-trained weight file, when finetuning
            *nclasses* number of classes,type=int,default=2
            *documents* whether to train from text documents (as opposed to sentences in one doc),type=boolean
            *batch_size* batch size,type=int,default=128
            *test_batch_size* test batch size,type=int,default=16
            *gpu* enable gpu usage is True, default=False
            *gpuid* specify gpu id,type=int,default=0
            *mllib* caffe or xgboost,default='caffe'
            *lregression* whether to use logistic regression,type=boolean
        :param obj logger: DFM logger object
        :param obj storage: DFM storage object
        :param obj config: DFM global config object
        :returns: ModelTrainer object (instance of a modeltrainer class)
        """
        self.config=config
        self.structure=structure
        self.logger=logger
        self.nclasses = self.structure['nclasses']
        self.description = 'classifier'
        self.sname=self.structure['sname']
        self.mllib = self.structure['mllib']
        self.dd = DD(config['DEEP_DETECT_URI'],config['DEEP_DETECT_PORT'])
        self.dd.set_return_format(self.dd.RETURN_PYTHON)

    def createMLTrainerService(self):
        """ Create ML Trainer service in DeepDetect """
        if self.structure['lregression']:
            self.template = 'lregression'
        else:
            self.template = 'mlp'
            layers = [800,500,200]
        if self.structure['clevel']:
            self.template = 'convnet'
            self.layers = ['1CR256','1CR256','4CR256','1024','1024']
        self.model = {'templates':'../templates/caffe/','repository':self.structure['model-repo']}
        self.parameters_input = {'connector':'txt','sentences':False,'characters':self.structure['clevel'],'read_forward':True}
        if self.structure['documents']:
            self.parameters_input['sentences'] = False
        if self.structure['clevel']:
            self.parameters_input['sequence'] = self.sequence
            #parameters_input['alphabet'] = 'abcdef0123456789'  # hex
        #    parameters_input['alphabet'] = '_-,:?/.(){}*%0123456789abcdefghijklmnopqrstuvwxyz' # opcode
            #parameters_input['alphabet'] = "abcdefghijklmnopqrstuvwxyz0123456789,;.!?'"#\"/\\|_@#$%^&*~`+-=<>"
        self.parameters_mllib = {'template':self.template,'nclasses':self.nclasses,'db':True,'dropout':0.5}
        if self.mllib == 'xgboost':
            self.parameters_mllib['db'] = False
        if not self.template == 'lregression':
            self.parameters_mllib['layers'] = layers
        #parameters_mllib = {'nclasses':nclasses,'db':True}
        if self.structure['finetune']:
            self.parameters_mllib['finetuning'] = True
            if not self.structure['weights']:
                logger.error('Finetuning requires weights file')  # server will fail on service creation anyways
            else:
                self.parameters_mllib['weights'] = self.structure['weights']
        self.parameters_output = {}
        self.logger.debug("dd.put_service("+str(self.structure['sname'])+","+str(self.model)+","+str(self.description)+","+str(self.mllib)+","+str(self.parameters_input)+","+str(self.parameters_mllib)+","+str(self.parameters_output)+")")
        return self.dd.put_service(self.structure['sname'],self.model,self.description,self.mllib,self.parameters_input,self.parameters_mllib,self.parameters_output)

    def trainModel(self):
        """ Train the model. """
        self.train_data = [self.structure['training-repo']]
        self.parameters_input = {'test_split':self.structure['test_split'],'shuffle':True,'db':True}
        if not self.structure['clevel']:
            self.parameters_input['min_word_length'] = 5
            self.parameters_input['min_count'] = 10
            self.parameters_input['count'] = False
            if self.mllib == 'xgboost':
                self.parameters_input['tfidf'] =  True
                self.parameters_input['db'] = False
        else:
            self.parameters_input['sentences'] = True
            self.parameters_input['characters'] = True
            self.parameters_input['sequence'] = self.sequence
        if self.structure['documents']:
            self.parameters_input['sentences'] = False
        if self.mllib == 'caffe':
            self.parameters_input['db']=True
            self.parameters_mllib = {
             'gpu':self.structure['gpu'],
             'gpuid':self.structure['gpuid'],
             'resume':self.structure['resume'],
             'net':{
              'batch_size':self.structure['batch_size']
             },
             'solver':{
              'test_interval':self.structure['test_interval'],
              'test_initialization':False,
              'base_lr':self.structure['base_lr'],
              'solver_type':'ADAM',
              'iterations':self.structure['iterations']
             }
            }#,'lr_policy':'step','stepsize':self.structure['stepsize'],'gamma':0.5,'weight_decay':0.0001}}
        elif self.mllib == 'xgboost':
            self.parameters_mllib = {
              'iterations':self.structure['iterations'],
              'objective':'multi:softprob',
              'booster_params':{'max_depth':50}
             }
        self.parameters_output = {'measure':['mcll','f1','cmdiag','cmfull']}
        if self.nclasses == 2:
            self.parameters_output['measure'].append('auc')
        self.logger.debug("dd.post_train("+self.structure['sname']+","+str(self.train_data)+","+str(self.parameters_input)+","+str(self.parameters_mllib)+","+str(self.parameters_output)+",async="+str(True)+")")
        self.dd.post_train(self.structure['sname'],self.train_data,self.parameters_input,self.parameters_mllib,self.parameters_output,async=True)
        time.sleep(1)
        train_status = ''
        while True:
            train_status = self.dd.get_train(self.sname,job=1,timeout=10)
            if train_status['head']['status'] == 'running':
                self.logger.debug(train_status['body']['measure'])
            else:
                self.logger.debug(train_status)
                break
        return train_status

    def clearMLTrainerService(self,clear=''):
        """ delete the service, keeping the model

        :param str clear: use clear='lib' to clear the model as well, default empty.
        :returns: DeepDetect delete result
        """
        return self.dd.delete_service(self.sname,clear=clear)
Exemple #17
0
import cv2
from dd_client import DD
import numpy as np
import argparse

host = 'localhost'
port = 8080
dd = DD(host, port)
dd.set_return_format(dd.RETURN_PYTHON)

parser = argparse.ArgumentParser()
parser.add_argument(
    '--model-in-path',
    help='directory path that contains model to export (i.e. the .pt file)',
    required=True)
parser.add_argument('--img-size',
                    default=256,
                    type=int,
                    help='square image size')
parser.add_argument('--img-in', help='image to transform', required=True)
parser.add_argument('--img-out', help='transformed image', required=True)
parser.add_argument('--gpu', help='whether to run on GPU', action='store_true')
args = parser.parse_args()

# service creation call
model = {'repository': args.model_in_path}
parameters_input = {
    'connector': 'image',
    'width': args.img_size,
    'height': args.img_size
}
class DNNFeatureExtractor(FeatureGenerator):
    def __init__(self,
                 dnnmodel,
                 image_files,
                 index_repo,
                 batch_size=32,
                 dd_host='localhost',
                 dd_port=8080,
                 dd_description='image classification',
                 meta_in='',
                 meta_out='',
                 captions_in='',
                 captions_out='',
                 mapi_in='',
                 mapi_out=''):
        self.dd_host = dd_host
        self.dd_port = dd_port
        self.dd_description = dd_description
        self.dd_mllib = 'caffe'
        self.meta_in = meta_in
        self.meta_out = meta_out
        self.captions_in = captions_in
        self.captions_out = captions_out
        self.mapi_in = mapi_in
        self.mapi_out = mapi_out
        self.gpuid = 0
        self.dnnmodel = dnnmodel
        if self.dnnmodel.extract_layer:
            self.dd_mltype = 'unsupervised'
        else:
            self.dd_mltype = 'supervised'
        self.image_files = image_files
        self.batch_size = batch_size
        self.binarized = False
        self.dd = DD(self.dd_host, self.dd_port)
        self.dd.set_return_format(self.dd.RETURN_PYTHON)
        self.index_repo = index_repo + '/' + self.dnnmodel.name
        try:
            os.mkdir(self.index_repo)
        except:
            #logger.warning('directory ' + self.index_repo + ' may alreay exist')
            pass
        self.st = {}  # shelve used for full tags storage
        self.stm = {}  # in memory tmp storage
        if self.dd_mltype == 'supervised':
            self.st = shelve.open(self.index_repo + '/tags.bin')
        self.delete_dd_service()

    def __del__(self):
        if self.dd_mltype == 'supervised':
            for i, t in self.stm.iteritems():
                self.st[i] = t
            self.st.close()

    def create_dd_service(self):
        model = {'repository': self.dnnmodel.model_repo}
        parameters_input = {
            'connector': 'image',
            'width': self.dnnmodel.img_width,
            'height': self.dnnmodel.img_height
        }
        parameters_mllib = {
            'nclasses': self.dnnmodel.nclasses,
            'gpu': True,
            'gpuid': self.gpuid
        }
        parameters_output = {}
        screate = self.dd.put_service(self.dnnmodel.name, model,
                                      self.dd_description, self.dd_mllib,
                                      parameters_input, parameters_mllib,
                                      parameters_output, self.dd_mltype)
        outcode = screate['status']['code']
        if outcode != 201 and outcode != 403:
            logger.error('failed creation of DNN service ' +
                         self.dnnmodel.name)
            #return
            raise Exception('failed creating DNN service ' +
                            self.dnnmodel.name)
        return

    def delete_dd_service(self):
        self.dd.delete_service(self.dnnmodel.name, clear='')

    def preproc(self):
        # none needed with dd at the moment
        return

    def index(self):
        ## feature generation, to be indexed or searched for
        self.create_dd_service()
        feature_vectors = []
        uris = []
        parameters_input = {}
        parameters_mllib = {
            'gpu': True,
            'gpuid': self.gpuid,
            'extract_layer': self.dnnmodel.extract_layer
        }

        if self.dd_mltype == 'unsupervised':
            parameters_output = {'binarized': self.binarized}
            # pass one image to get the size of the output layer
            classif = self.dd.post_predict(self.dnnmodel.name,
                                           [self.image_files[0]],
                                           parameters_input, parameters_mllib,
                                           parameters_output)
            response_code = classif['status']['code']
            if response_code != 200:
                print 'response=', classif
                logger.error(
                    'failed (index) initial prediction call to model ' +
                    self.dnnmodel.name + ' via dd')
                self.delete_dd_service()
                return
            dim = len(classif['body']['predictions']['vals'])
        else:
            parameters_output = {'best': self.dnnmodel.best}
            dim = self.dnnmodel.nclasses

        c = 0
        logger.info('dnn feature prediction and indexing for service ' +
                    self.dnnmodel.name + ' with layer of size ' + str(dim))
        with Indexer(dim, self.index_repo) as indexer:
            for x in batch(self.image_files, self.batch_size):
                classif = self.dd.post_predict(self.dnnmodel.name, x,
                                               parameters_input,
                                               parameters_mllib,
                                               parameters_output)
                #print classif
                response_code = classif['status']['code']
                if response_code != 200:
                    print 'response=', classif
                    logger.error(
                        'failed (index) batch prediction call to model ' +
                        self.dnnmodel.name + ' via dd')
                    continue
                predictions = classif['body']['predictions']
                if self.batch_size == 1 or len(self.image_files) == 1:
                    predictions = [predictions]
                for p in predictions:
                    if self.dd_mltype == 'unsupervised':
                        indexer.index_single(c, p['vals'], p['uri'])
                        if c > 0 and c % self.batch_size == 0:
                            logger.info('indexed ' + str(c) + ' images')
                    else:
                        puri = str(p['uri'])
                        indexer.index_tags_single(p['classes'], p['uri'])
                        self.stm[puri] = []
                        for pc in p['classes']:
                            self.stm[puri].append(pc['cat'])
                    c = c + 1

            indexer.build_index()
            indexer.save_index()
        logger.info('indexed a total of ' + str(c) + ' images')
        self.delete_dd_service()

    def search(self, jdataout={}):
        self.create_dd_service()
        parameters_input = {}
        parameters_mllib = {
            'gpu': True,
            'gpuid': self.gpuid,
            'extract_layer': self.dnnmodel.extract_layer
        }

        if self.dd_mltype == 'unsupervised':
            parameters_output = {'binarized': self.binarized}
        else:
            parameters_output = {'best': self.dnnmodel.best}

        logger.info('dnn feature prediction and searching for service ' +
                    self.dnnmodel.name)
        results = {}
        with Searcher(self.index_repo, search_size=500) as searcher:
            searcher.load_index()
            for x in batch(self.image_files, self.batch_size):
                classif = self.dd.post_predict(self.dnnmodel.name, x,
                                               parameters_input,
                                               parameters_mllib,
                                               parameters_output)
                response_code = classif['status']['code']
                if response_code != 200:
                    print 'response=', classif
                    logger.error(
                        'failed batch (search) prediction call to model ' +
                        self.dnnmodel.name + ' via dd')
                    self.delete_dd_service()
                    print classif
                    raise Exception(
                        'failed batch (search) prediction call to model ' +
                        self.dnnmodel.name)
                predictions = classif['body']['predictions']
                if self.batch_size == 1 or len(self.image_files) == 1:
                    predictions = [predictions]
                #print 'predictions=',predictions
                for p in predictions:
                    if self.dd_mltype == 'unsupervised':
                        nns = searcher.search_single(p['vals'], p['uri'])
                    else:
                        puri = str(p['uri'])
                        nns = searcher.search_tags_single(p['classes'], puri)
                        nns['tags_out_all'] = []
                        for nn in nns['nns_uris']:
                            nns['tags_out_all'].append(self.st[str(nn)])
                    results[p['uri']] = nns

        self.delete_dd_service()
        return self.to_json(results, '/img/reuters/', '/img/tate/',
                            self.dnnmodel.name, self.dnnmodel.description,
                            jdataout, self.meta_in, self.meta_out,
                            self.captions_in, self.captions_out, self.mapi_in,
                            self.mapi_out)
Exemple #19
0
parser.add_argument("--port", help="DeepDetect port", type=int, default=8080)
parser.add_argument("--confidence-threshold",
                    help="keep detections with confidence above threshold",
                    type=float,
                    default=0.1)
parser.add_argument("--save-path", help="Where to save resulting image")
args = parser.parse_args()

host = 'localhost'
sname = 'imgserv'
description = 'image classification'
mllib = 'caffe'
mltype = 'supervised'
nclasses = 21
width = height = 300
dd = DD(host, port=args.port)
dd.set_return_format(dd.RETURN_PYTHON)

# creating ML service
model_repo = os.getcwd() + '/model'
model = {'repository': model_repo}
parameters_input = {'connector': 'image', 'width': width, 'height': height}
parameters_mllib = {'nclasses': nclasses}
parameters_output = {}
dd.put_service(sname, model, description, mllib, parameters_input,
               parameters_mllib, parameters_output, mltype)

# chain call
calls = []

parameters_input = {"keep_orig": True}
Exemple #20
0
    small = cv2.resize(imgquery, dim)
    return small


host = 'localhost'
sname = 'imgserv'
description = 'image classification'
mllib = 'caffe'
mltype = 'unsupervised'
extract_layer = 'loss3/classifier'
#extract_layer = 'pool5/7x7_s1'
nclasses = 1000
layer_size = 1000  # default output code size
width = height = 224
binarized = False
dd = DD(host)
dd.set_return_format(dd.RETURN_PYTHON)
ntrees = 100
metric = 'angular'  # or 'euclidean'

# creating ML service
model_repo = os.getcwd() + '/model'
model = {'repository': model_repo, 'templates': '../templates/caffe/'}
parameters_input = {'connector': 'image', 'width': width, 'height': height}

# Only indexing needs the template.
if args.index:
    parameters_mllib = {'nclasses': nclasses, 'template': 'googlenet'}
else:
    parameters_mllib = {'nclasses': nclasses}
Exemple #21
0
    shutil.copy2('includes/dede_deploy.prototxt', 'dedemodel/deploy.prototxt')
if not os.path.exists('dedemodel/corresp.txt'):
    shutil.copy2('includes/corresp.txt', 'dedemodel/corresp.txt')
# remove old models
for root, dirs, files in os.walk('dedemodel'):
    for name in files:
        if name.lower().endswith('.caffemodel'):
            os.remove(os.path.join(root, name))
# copy new model
recentmodel = most_recent_iteration(args.builddir)
print('Using model ' + recentmodel)
shutil.copy2(os.path.join('builds', args.builddir, 'snapshots', recentmodel),
             'dedemodel/model.caffemodel')

# setup DeepDetect service if necessary
dd = DD('localhost')
dd.set_return_format(dd.RETURN_PYTHON)
model = {'repository': '/dockershare/ssd/dedemodel'}
parameters_input = {'connector': 'image', 'width': 512, 'height': 512}
parameters_mllib = {'nclasses': 7}
parameters_output = {}
detect = dd.delete_service('ssd')
detect = dd.put_service('ssd', model, 'single-shot detector', 'caffe',
                        parameters_input, parameters_mllib, parameters_output,
                        'supervised')

# recursively process input directory
for root, dirs, files in os.walk(folder_input):
    for name in sorted(files):
        name, ext = os.path.splitext(name)
        if (ext.lower().endswith(('.mp4', '.avi', '.mov'))
Exemple #22
0
from dd_client import DD
import matplotlib
import numpy as np
import time

import matplotlib.pyplot as plt
import pylab

model_repo = "/tmp"
host = 'localhost'
port = 8080
sname = 'test'
description = 'clustering'
mllib = 'tsne'
dd = DD(host)
dd.set_return_format(dd.RETURN_PYTHON)

training_repo = 'http://deepdetect.com/dd/datasets/mnist_csv/mnist_test.csv'

# service creation
model = {'repository':model_repo}
parameters_input = {'connector':'csv'}
parameters_mllib = {}
parameters_output = {}
dd.put_service(sname,model,description,mllib,
               parameters_input,parameters_mllib,parameters_output,'unsupervised')

# training
train_data = [training_repo]
parameters_input = {'id':'','separator':',','label':'label'}
parameters_mllib = {'iterations':500}
import sys
import json
from dd_client import DD

with open('config.json', 'r') as f:
    config = json.load(f)

service = sys.argv[1]

# setting up DD client
host = 'localhost'
sname = config['REPO'][service]['NAME']
dd = DD(host)
dd.set_return_format(dd.RETURN_PYTHON)

dd.delete_service(sname, 'full')
Exemple #24
0
    help=
    "How many top predictions should be considered to chose the next token.")
parser.add_argument(
    "--temperature",
    type=float,
    default=1,
    help="Temperature of the predictions. The higher, the 'randomer'.")

args = parser.parse_args()

# dd global variables
sname = 'gpt-2'
description = 'Inference with GPT-2'
mllib = 'torch'

dd = DD(args.host, args.port)
dd.set_return_format(dd.RETURN_PYTHON)

# setting up the ML service
model = {'repository': args.repository}
parameters_input = {
    'connector': 'txt',
    'ordered_words': True,
    'wordpiece_tokens': True,
    'punctuation_tokens': True,
    'lower_case': False,
    'width': args.input_size
}
parameters_mllib = {'template': 'gpt2', 'gpu': True}
parameters_output = {}
dd.put_service(sname, model, description, mllib, parameters_input,
Exemple #25
0
def main():
    parser = argparse.ArgumentParser(description="DeepDetect benchmark tool")
    parser.add_argument("--host", help="server host", default="localhost")
    parser.add_argument("--port", help="server port", type=int, default=8080)
    parser.add_argument("--sname", help="service name")
    parser.add_argument("--img-width",
                        help="image width",
                        type=int,
                        default=224)
    parser.add_argument("--img-height",
                        help="image height",
                        type=int,
                        default=224)
    parser.add_argument("--bw",
                        help="whether images are bw",
                        action="store_true")
    parser.add_argument(
        "--histogram-equalization",
        "--eqhist",
        help="whether we apply an histogram equalization to images",
        action="store_true",
    )
    parser.add_argument("--gpu",
                        help="whether to bench GPU",
                        action="store_true")
    parser.add_argument("--gpuid", help="gpu id to use", type=int, default=0)
    parser.add_argument("--cpu",
                        help="whether to bench CPU",
                        action="store_true")
    parser.add_argument(
        "--remote-bench-data-dir",
        help="when bench data directory, when available remotely on the server",
    )
    parser.add_argument("--max-batch-size",
                        help="max batch size to be tested",
                        type=int,
                        default=256)
    parser.add_argument(
        "--max-workspace-size",
        help="max workspace size for tensort bench",
        type=int,
        default=1024,
    )
    parser.add_argument(
        "--list-bench-files",
        help="file holding the list of bench files",
        default="list_bench_files.txt",
    )
    parser.add_argument("--npasses",
                        help="number of passes for every batch size",
                        type=int,
                        default=5)
    parser.add_argument("--detection",
                        help="whether benching a detection model",
                        action="store_true")
    parser.add_argument(
        "--segmentation",
        help="whether benching a segmentation model",
        action="store_true",
    )
    parser.add_argument(
        "--regression",
        help="whether benching a regression model",
        action="store_true",
    )
    parser.add_argument(
        "--search",
        help="whether benching a similarity search service",
        action="store_true",
    )
    parser.add_argument(
        "--search-multibox",
        help="whether benching a multibox similarity search service",
        action="store_true",
    )
    parser.add_argument("--create",
                        help="model's folder name to create a service")
    parser.add_argument(
        "--nclasses",
        help="number of classes for service creation",
        type=int,
        default=1000,
    )
    parser.add_argument(
        "--auto-kill",
        help="auto kill the service after benchmarking",
        action="store_true",
    )
    parser.add_argument("--csv-output", help="CSV file output")
    parser.add_argument("--json-output", help="JSON file output")
    parser.add_argument("--mllib",
                        help="mllib to bench, ie [tensorrt|ncnn|caffe]",
                        default="caffe")
    parser.add_argument("--datatype",
                        help="datatype for tensorrt [fp16|fp32]",
                        default="fp32")
    parser.add_argument(
        "--recreate",
        help=
        "recreate service between every batchsize, useful for batch_size dependent precompiling backends (ie tensorRT)",
        action="store_true",
        default=False,
    )
    parser.add_argument("--dla",
                        help="use dla",
                        action="store_true",
                        default=False)
    parser.add_argument("--gpu-resize",
                        help="image resizing on gpu",
                        action="store_true",
                        default=False)
    parser.add_argument(
        "--image-interp",
        help="image interpolation method (nearest, linear, cubic, ...)",
    )
    args = parser.parse_args()

    host = args.host
    port = args.port
    dd = DD(host, port)
    dd.set_return_format(dd.RETURN_PYTHON)
    autokill = args.auto_kill

    def service_create(bs):
        # Create a service
        if args.create:
            description = "image classification service"
            mllib = args.mllib
            model = {"repository": args.create}
            parameters_input = {
                "connector": "image",
                "width": args.img_width,
                "height": args.img_height,
                "bw": args.bw,
                "histogram_equalization": args.histogram_equalization,
            }
            if args.segmentation:
                parameters_input["segmentation"] = True
            if args.regression:
                parameters_input["regression"] = True
            if args.dla:
                parameters_mllib = {
                    "nclasses": args.nclasses,
                    "datatype": args.datatype,
                    "readEngine": True,
                    "writeEngine": True,
                    "maxBatchSize": bs,
                    "dla": 0,
                    "maxWorkspaceSize": args.max_workspace_size,
                }
            else:
                parameters_mllib = {
                    "nclasses": args.nclasses,
                    "datatype": args.datatype,
                    "readEngine": True,
                    "writeEngine": True,
                    "maxBatchSize": bs,
                    "maxWorkspaceSize": args.max_workspace_size,
                }
            parameters_output = {}
            dd.put_service(
                args.sname,
                model,
                description,
                mllib,
                parameters_input,
                parameters_mllib,
                parameters_output,
            )
        else:
            pass

    out_json = []
    out_csv = None
    csv_writer = None
    if args.csv_output:
        out_csv = open(args.csv_output, "w+")
        csv_writer = csv.writer(out_csv)
        csv_writer.writerow(
            ["batch_size", "mean processing time", "mean time per img"])

    list_bench_files = []
    with open(args.list_bench_files) as f:
        for line in f:
            list_bench_files.append(args.remote_bench_data_dir + "/" +
                                    line.rstrip())
    batch_sizes = []
    batch_size = 1
    while batch_size <= args.max_batch_size:
        batch_sizes.append(batch_size)
        if batch_size < 32:
            batch_size = batch_size * 2
        else:
            batch_size += 16

    parameters_input = {}
    if not args.image_interp == "":
        parameters_input["interp"] = args.image_interp
    if args.gpu_resize:
        parameters_input["cuda"] = args.gpu_resize
    parameters_mllib = {"gpu": args.gpu, "gpuid": args.gpuid}
    parameters_output = {}
    if args.detection:
        parameters_output["confidence_threshold"] = 0.1
        if args.search or args.search_multibox:
            parameters_output["search"] = True
            parameters_output["rois"] = "rois"
            parameters_output["bbox"] = False
        else:
            parameters_output["bbox"] = True
        if args.search_multibox:
            parameters_output["multibox_rois"] = True
    elif args.segmentation:
        parameters_input["segmentation"] = True
    elif args.regression:
        parameters_output["regression"] = True
    elif args.search:
        parameters_output["search"] = True

    # First call to load model
    data = list_bench_files[:1]
    if not args.recreate:
        if not args.mllib == "tensorrt" or args.recreate:
            service_create(1)
        else:
            service_create(args.max_batch_size)
        classif = dd.post_predict(args.sname, data, parameters_input,
                                  parameters_mllib, parameters_output)

    for b in batch_sizes:
        data = list_bench_files[:b]
        fail = False
        if args.recreate:
            service_create(b)
            for i in range(5):
                classif = dd.post_predict(
                    args.sname,
                    data,
                    parameters_input,
                    parameters_mllib,
                    parameters_output,
                )
        mean_ptime = 0
        mean_ptime_per_img = 0
        for i in range(0, args.npasses + 1):
            print("testing batch size = %s" % len(data))
            classif = dd.post_predict(args.sname, data, parameters_input,
                                      parameters_mllib, parameters_output)
            if classif["status"]["code"] == 200:
                if i == 0:
                    continue  # skipping first pass so that the batch resize does not affect timing
                ptime = classif["head"]["time"]
                ptime_per_img = ptime / b
                mean_ptime += ptime
                mean_ptime_per_img += ptime_per_img
                print(
                    "pass %s batch size = %s / processing time = %s / time per image = %s"
                    % (i, b, ptime, ptime_per_img))
            else:
                print(classif["status"])
                # reload model
                data = list_bench_files[:1]
                classif = dd.post_predict(
                    args.sname,
                    data,
                    parameters_input,
                    parameters_mllib,
                    parameters_output,
                )
                fail = True
                break
        mean_processing_time = mean_ptime / args.npasses
        mean_time_per_img = mean_ptime_per_img / args.npasses
        print(
            ">>> batch size = %s / mean processing time = %s / mean time per image = %s / fps = %s / fail = %s"
            % (
                b,
                mean_ptime / args.npasses,
                mean_ptime_per_img / args.npasses,
                1000 / (mean_ptime_per_img / args.npasses),
                fail,
            ), )
        out_json.append({
            "batch_size": b,
            "mean_processing_time": mean_processing_time,
            "mean_time_per_img": mean_time_per_img,
        })
        if args.csv_output:
            csv_writer.writerow([b, mean_processing_time, mean_time_per_img])
        # break
        if args.recreate:
            dd.delete_service(args.sname)

    if args.json_output:
        with open(args.json_output, "w") as outfile:
            json.dump(out_json, outfile)

    if autokill:
        dd.delete_service(args.sname)
from dd_client import DD
import matplotlib
import numpy as np
import time

import matplotlib.pyplot as plt
import pylab

model_repo = "/tmp"
host = 'localhost'
port = 8080
sname = 'test'
description = 'clustering'
mllib = 'tsne'
dd = DD(host)
dd.set_return_format(dd.RETURN_PYTHON)

training_repo = 'http://deepdetect.com/dd/datasets/mnist_csv/mnist_test.csv'

# service creation
model = {'repository': model_repo}
parameters_input = {'connector': 'csv'}
parameters_mllib = {}
parameters_output = {}
dd.put_service(sname, model, description, mllib, parameters_input,
               parameters_mllib, parameters_output, 'unsupervised')

# training
train_data = [training_repo]
parameters_input = {'id': '', 'separator': ',', 'label': 'label'}
parameters_mllib = {'iterations': 500}
Exemple #27
0
                    help='whether benching a detection model',
                    action='store_true')
parser.add_argument('--create',
                    help='model\'s folder name to create a service')
parser.add_argument('--nclasses',
                    help='number of classes for service creation',
                    type=int,
                    default=1000)
parser.add_argument('--auto-kill',
                    help='auto kill the service after benchmarking',
                    action='store_true')
args = parser.parse_args()

host = args.host
port = args.port
dd = DD(host, port)
dd.set_return_format(dd.RETURN_PYTHON)
autokill = args.auto_kill

# Create a service
if args.create:
    description = 'image classification service'
    mllib = 'caffe'
    model = {'repository': args.create}
    parameters_input = {
        'connector': 'image',
        'width': args.img_width,
        'height': args.img_height
    }
    parameters_mllib = {'nclasses': args.nclasses}
    parameters_output = {}
Exemple #28
0
  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
"""

from dd_client import DD

HOST = 'localhost'
PORT = 8080

dd = DD(HOST, PORT)
dd.set_return_format(dd.RETURN_PYTHON)


def delete_dd_service(sname):
    dd.delete_service(sname, clear='')


# main
info = dd.info()

# in case there are remaining services, remove them
for s in info['head']['services']:
    sname = s['name']
    delete_dd_service(sname)