def upload_samples(self, module_id, samples_with_categories, sleep_if_throttled=True, features_schema=None): url = self.endpoint + module_id + '/samples/' samples = [] for i, s in enumerate(samples_with_categories): # if is multi-feature if isinstance(s[0], dict): sample_dict = {"features": s[0]} elif isinstance(s[0], six.string_types): sample_dict = {"text": s[0]} else: raise MonkeyLearnException('The sample must be a text in sample ' + str(i)) if (isinstance(s[1], int) or (isinstance(s[1], list) and all(isinstance(c, int) for c in s[1]))): sample_dict["category_id"] = s[1] elif (isinstance(s[1], six.string_types) or (isinstance(s[1], list) and all(isinstance(c, six.string_types) for c in s[1]))): sample_dict["category_path"] = s[1] elif s[1] is not None: raise MonkeyLearnException('Invalid category value in sample ' + str(i)) if (len(s) > 2 and s[2] and (isinstance(s[2], six.string_types) or (isinstance(s[2], list) and all(isinstance(c, six.string_types) for c in s[2])))): sample_dict['tag'] = s[2] samples.append(sample_dict) data = { 'samples': samples } if features_schema: data['features_schema'] = features_schema response = self.make_request(url, 'POST', data, sleep_if_throttled) self.handle_errors(response) return MonkeyLearnResponse(response.json()['result'], [response])
def check_batch_limits(self, text_list, batch_size): if batch_size > MAX_BATCH_SIZE or batch_size < MIN_BATCH_SIZE: raise MonkeyLearnException('batch_size has to be between {0} and {1}'.format( MIN_BATCH_SIZE, MAX_BATCH_SIZE)) if not text_list: raise MonkeyLearnException('The text_list can\'t be empty.') if '' in text_list: raise MonkeyLearnException('You have an empty text in position {0} in text_list'.format( text_list.index(''))) for i, text in enumerate(text_list): if not isinstance(text, six.string_types): raise MonkeyLearnException( 'Element in position {0} in text_list must be a string.'.format(i) )
def upload_samples(self, module_id, samples_with_categories, sleep_if_throttled=True): url = self.endpoint + module_id + '/samples/' samples = [] for i, s in enumerate(samples_with_categories): if (isinstance(s[1], int) or (isinstance(s[1], list) and all(isinstance(c, int) for c in s[1]))): sample_dict = {"text": s[0], "category_id": s[1]} elif (isinstance(s[1], basestring) or (isinstance(s[1], list) and all(isinstance(c, basestring) for c in s[1]))): sample_dict = {"text": s[0], "category_path": s[1]} elif s[1] is None: sample_dict = {"text": s[0]} else: raise MonkeyLearnException( 'Invalid category value in sample ' + str(i)) if (len(s) > 2 and s[2] and (isinstance(s[2], basestring) or (isinstance(s[2], list) and all(isinstance(c, basestring) for c in s[2])))): sample_dict['tag'] = s[2] samples.append(sample_dict) data = {'samples': samples} response = self.make_request(url, 'POST', data, sleep_if_throttled) self.handle_errors(response) return MonkeyLearnResponse(response.json()['result'], [response])
def predict(self, module_id, sample_list=None, sandbox=False, batch_size=DEFAULT_BATCH_SIZE, sleep_if_throttled=True): try: sample_list = list(sample_list) except TypeError: raise MonkeyLearnException('The sample_list can\'t be None.') self.check_batch_limits(sample_list, batch_size) url = self.endpoint + module_id + '/predict/' url_params = {} if sandbox: url_params['sandbox'] = 1 if url_params: url += '?{}'.format(urlencode(url_params)) res = [] responses = [] for i in range(0, len(sample_list), batch_size): data = { 'text_list': sample_list[i:i + batch_size] } response = self.make_request(url, 'POST', data, sleep_if_throttled) self.handle_errors(response) responses.append(response) res.extend(response.json()['result']) return MonkeyLearnResponse(res, responses)
def handle_errors(self, response): if not response.ok: try: res = response.json() except ValueError: response.raise_for_status() raise MonkeyLearnException(json.dumps(res['detail']))
def run(self, module_id, data, sandbox=False, sleep_if_throttled=True): if not isinstance(data, dict): raise MonkeyLearnException('The data parameter must be a dictionary') url = self.endpoint + module_id + '/run/' if sandbox: url += '?sandbox=1' response = self.make_request(url, 'POST', data, sleep_if_throttled) self.handle_errors(response) return MonkeyLearnResponse(response.json()['result'], [response])
def classify(self, module_id, sample_list=None, sandbox=False, batch_size=DEFAULT_BATCH_SIZE, sleep_if_throttled=True, debug=False, text_list=None): if text_list: warnings.warn( "The text_list parameter will be deprecated in future versions. Please use sample_list." ) sample_list = text_list try: sample_list = list(sample_list) except TypeError: raise MonkeyLearnException('The sample_list can\'t be None.') self.check_batch_limits(sample_list, batch_size) url = self.endpoint + module_id + '/classify/' url_params = {} if sandbox: url_params['sandbox'] = 1 if debug: url_params['debug'] = 1 if url_params: url += '?{}'.format(urlencode(url_params)) res = [] responses = [] for i in range(0, len(sample_list), batch_size): # if is multi feature if isinstance(sample_list[0], dict): data = {'sample_list': sample_list[i:i + batch_size]} else: data = {'text_list': sample_list[i:i + batch_size]} response = self.make_request(url, 'POST', data, sleep_if_throttled) self.handle_errors(response) responses.append(response) res.extend(response.json()['result']) return MonkeyLearnResponse(res, responses)
def upload_samples(self, module_id, samples_to_upload, sleep_if_throttled=True): url = self.endpoint + module_id + '/samples/' samples = [] for i, s in enumerate(samples_to_upload): if isinstance(s[0], six.string_types): sample_dict = {"text": s[0]} else: raise MonkeyLearnException('The sample must be a text in sample ' + str(i)) if (len(s) > 1 and s[1] and (isinstance(s[1], six.string_types) or (isinstance(s[1], list) and all(isinstance(c, six.string_types) for c in s[1])))): sample_dict['tag'] = s[1] samples.append(sample_dict) data = { 'samples': samples } response = self.make_request(url, 'POST', data, sleep_if_throttled) self.handle_errors(response) return MonkeyLearnResponse(response.json()['result'], [response])