コード例 #1
0
    def upload_samples(self, module_id, samples_with_categories, sleep_if_throttled=True,
                       features_schema=None):
        url = self.endpoint + module_id + '/samples/'
        samples = []
        for i, s in enumerate(samples_with_categories):
            # if is multi-feature
            if isinstance(s[0], dict):
                sample_dict = {"features": s[0]}
            elif isinstance(s[0], six.string_types):
                sample_dict = {"text": s[0]}
            else:
                raise MonkeyLearnException('The sample must be a text in sample ' + str(i))

            if (isinstance(s[1], int) or
                    (isinstance(s[1], list) and all(isinstance(c, int) for c in s[1]))):
                sample_dict["category_id"] = s[1]
            elif (isinstance(s[1], six.string_types) or
                    (isinstance(s[1], list) and all(isinstance(c, six.string_types) for c in s[1]))):
                sample_dict["category_path"] = s[1]
            elif s[1] is not None:
                raise MonkeyLearnException('Invalid category value in sample ' + str(i))

            if (len(s) > 2 and s[2] and (isinstance(s[2], six.string_types) or
                    (isinstance(s[2], list) and all(isinstance(c, six.string_types) for c in s[2])))):
                sample_dict['tag'] = s[2]

            samples.append(sample_dict)
        data = {
            'samples': samples
        }
        if features_schema:
            data['features_schema'] = features_schema
        response = self.make_request(url, 'POST', data, sleep_if_throttled)
        self.handle_errors(response)
        return MonkeyLearnResponse(response.json()['result'], [response])
コード例 #2
0
 def check_batch_limits(self, text_list, batch_size):
     if batch_size > MAX_BATCH_SIZE or batch_size < MIN_BATCH_SIZE:
         raise MonkeyLearnException('batch_size has to be between {0} and {1}'.format(
                                    MIN_BATCH_SIZE, MAX_BATCH_SIZE))
     if not text_list:
         raise MonkeyLearnException('The text_list can\'t be empty.')
     if '' in text_list:
         raise MonkeyLearnException('You have an empty text in position {0} in text_list'.format(
                                    text_list.index('')))
     for i, text in enumerate(text_list):
         if not isinstance(text, six.string_types):
             raise MonkeyLearnException(
                 'Element in position {0} in text_list must be a string.'.format(i)
             )
コード例 #3
0
    def upload_samples(self,
                       module_id,
                       samples_with_categories,
                       sleep_if_throttled=True):
        url = self.endpoint + module_id + '/samples/'
        samples = []
        for i, s in enumerate(samples_with_categories):
            if (isinstance(s[1], int)
                    or (isinstance(s[1], list)
                        and all(isinstance(c, int) for c in s[1]))):
                sample_dict = {"text": s[0], "category_id": s[1]}
            elif (isinstance(s[1], basestring)
                  or (isinstance(s[1], list)
                      and all(isinstance(c, basestring) for c in s[1]))):
                sample_dict = {"text": s[0], "category_path": s[1]}
            elif s[1] is None:
                sample_dict = {"text": s[0]}
            else:
                raise MonkeyLearnException(
                    'Invalid category value in sample ' + str(i))

            if (len(s) > 2 and s[2]
                    and (isinstance(s[2], basestring) or
                         (isinstance(s[2], list)
                          and all(isinstance(c, basestring) for c in s[2])))):
                sample_dict['tag'] = s[2]

            samples.append(sample_dict)
        data = {'samples': samples}
        response = self.make_request(url, 'POST', data, sleep_if_throttled)
        self.handle_errors(response)
        return MonkeyLearnResponse(response.json()['result'], [response])
コード例 #4
0
ファイル: clustering.py プロジェクト: kirankarpurapu/TweetMap
    def predict(self, module_id, sample_list=None, sandbox=False,
                batch_size=DEFAULT_BATCH_SIZE, sleep_if_throttled=True):
        try:
            sample_list = list(sample_list)
        except TypeError:
            raise MonkeyLearnException('The sample_list can\'t be None.')
        self.check_batch_limits(sample_list, batch_size)

        url = self.endpoint + module_id + '/predict/'
        url_params = {}
        if sandbox:
            url_params['sandbox'] = 1
        if url_params:
            url += '?{}'.format(urlencode(url_params))

        res = []
        responses = []
        for i in range(0, len(sample_list), batch_size):
            data = {
                'text_list': sample_list[i:i + batch_size]
            }
            response = self.make_request(url, 'POST', data, sleep_if_throttled)
            self.handle_errors(response)
            responses.append(response)
            res.extend(response.json()['result'])

        return MonkeyLearnResponse(res, responses)
コード例 #5
0
 def handle_errors(self, response):
     if not response.ok:
         try:
             res = response.json()
         except ValueError:
             response.raise_for_status()
         raise MonkeyLearnException(json.dumps(res['detail']))
コード例 #6
0
ファイル: pipelines.py プロジェクト: kirankarpurapu/TweetMap
    def run(self, module_id, data, sandbox=False, sleep_if_throttled=True):
        if not isinstance(data, dict):
            raise MonkeyLearnException('The data parameter must be a dictionary')
        url = self.endpoint + module_id + '/run/'
        if sandbox:
            url += '?sandbox=1'
        response = self.make_request(url, 'POST', data, sleep_if_throttled)
        self.handle_errors(response)

        return MonkeyLearnResponse(response.json()['result'], [response])
コード例 #7
0
    def classify(self,
                 module_id,
                 sample_list=None,
                 sandbox=False,
                 batch_size=DEFAULT_BATCH_SIZE,
                 sleep_if_throttled=True,
                 debug=False,
                 text_list=None):

        if text_list:
            warnings.warn(
                "The text_list parameter will be deprecated in future versions. Please use sample_list."
            )
            sample_list = text_list

        try:
            sample_list = list(sample_list)
        except TypeError:
            raise MonkeyLearnException('The sample_list can\'t be None.')
        self.check_batch_limits(sample_list, batch_size)

        url = self.endpoint + module_id + '/classify/'
        url_params = {}
        if sandbox:
            url_params['sandbox'] = 1
        if debug:
            url_params['debug'] = 1
        if url_params:
            url += '?{}'.format(urlencode(url_params))

        res = []
        responses = []
        for i in range(0, len(sample_list), batch_size):
            # if is multi feature
            if isinstance(sample_list[0], dict):
                data = {'sample_list': sample_list[i:i + batch_size]}
            else:
                data = {'text_list': sample_list[i:i + batch_size]}
            response = self.make_request(url, 'POST', data, sleep_if_throttled)
            self.handle_errors(response)
            responses.append(response)
            res.extend(response.json()['result'])

        return MonkeyLearnResponse(res, responses)
コード例 #8
0
ファイル: clustering.py プロジェクト: kirankarpurapu/TweetMap
    def upload_samples(self, module_id, samples_to_upload, sleep_if_throttled=True):
        url = self.endpoint + module_id + '/samples/'
        samples = []
        for i, s in enumerate(samples_to_upload):
            if isinstance(s[0], six.string_types):
                sample_dict = {"text": s[0]}
            else:
                raise MonkeyLearnException('The sample must be a text in sample ' + str(i))

            if (len(s) > 1 and s[1] and (isinstance(s[1], six.string_types) or
                    (isinstance(s[1], list) and all(isinstance(c, six.string_types) for c in s[1])))):
                sample_dict['tag'] = s[1]

            samples.append(sample_dict)
        data = {
            'samples': samples
        }
        response = self.make_request(url, 'POST', data, sleep_if_throttled)
        self.handle_errors(response)
        return MonkeyLearnResponse(response.json()['result'], [response])