def classify_files(): """ The POST method allows API clients to use their model(s) The GET method allows API clients to get a list of available models. :return: """ if request.method == "POST": # Log that we got a request app.logger.error("Got a file POST request") # Extract and validate credentials. app_id, app_key = check_credentials() try: submitted_file = request.files.get('file') ctype = submitted_file.content_type if ctype not in ACCEPTED_MIMETYPES: app.logger.error("Unsupported Media Type: %s", ctype) raise UnsupportedMediaType data = json.loads(request.data) if 'textcol' in data: textcol = data['textcol'] validate_input_file(submitted_file, ctype) header, rows = sh.get_spreadsheet_rows(submitted_file, textcol, dedupe=True) model = data['model'] ideanet = IDEANETS[model] predictions = ideanet.classify(rows) data = {"status": "success", "data": predictions} except: data = {"status": "failure", "data": {}}
def classify_files(): """ The POST method allows API clients to use their model(s) The GET method allows API clients to get a list of available models. :return: """ if request.method == "POST": # Log that we got a request app.logger.error("Got a file POST request") # Extract and validate credentials. app_id, app_key = check_credentials() try: submitted_file = request.files.get('file') ctype = submitted_file.content_type if ctype not in ACCEPTED_MIMETYPES: app.logger.error("Unsupported Media Type: %s", ctype) raise UnsupportedMediaType data = json.loads(request.data) if 'textcol' in data: textcol = data['textcol'] validate_input_file(submitted_file, ctype) header, rows = sh.get_spreadsheet_rows(submitted_file,textcol,dedupe=True) model = data['model'] ideanet = IDEANETS[model] predictions = ideanet.classify(rows) data = {"status":"success", "data":predictions} except: data = {"status":"failure", "data":{}}
def preprocess(self): # For Synapsify Core output, the comments are in the first column # and the sentiment is in the 6th column if self._raw_rows==None: header, rows = sh.get_spreadsheet_rows(os.path.join(self._data_directory, self._data_file), self._text_col, dedupe=self._dedupe) else: header = [] rows = self._raw_rows sentences = [str(S[self._text_col]) for S in rows] classes = [str(S[self._label_col]) for S in rows] self._DICTIONARY = self._build_dict(sentences) if self._class_type=="Sentiment": # Grab the indices for the Core sentiment self._train_xx = self._get_rand_sentiment_indices(classes, self._train_size,[]) self._test_xx = self._get_rand_sentiment_indices(classes, self._test_size, self._train_xx) self._trXX = self._get_sentiment_indices([rows[r] for r in self._train_xx], self._label_col, []) self._teXX = self._get_sentiment_indices([rows[r] for r in self._test_xx], self._label_col, self._train_xx) # Munge training and test sets for the classes provided train = self._munge_class_freqs(sentences,[self._trXX['neg'],self._trXX['pos']]) test = self._munge_class_freqs(sentences,[self._teXX['neg'],self._teXX['pos']]) else: self._get_rand_indices(classes) # max_sentence_length(self.train_x_sets) # Munge training and test sets for the classes provided train_classes = [] train_unique_classes = np.unique([int(key) for key in self._trXX.keys()]).tolist() for trc in train_unique_classes: train_classes.append(self._trXX[str(trc)]) test_classes = [] test_unique_classes = np.unique([int(key) for key in self._teXX.keys()]).tolist() for trc in test_unique_classes: test_classes.append(self._teXX[str(trc)]) train = self._munge_class_freqs(sentences, train_classes) test = self._munge_class_freqs(sentences, test_classes) # Split training into a validation set per the model parameter valid_set_x, valid_set_y, train_set_x, train_set_y = self._split_train_w_valid_set( train) # Remove unknown words train_set_x = self._remove_unk(train_set_x) valid_set_x = self._remove_unk(valid_set_x) test_set_x = self._remove_unk(test[0]) self.train_set = (train_set_x, train_set_y) self.valid_set = (valid_set_x, valid_set_y) self.test_set = (test_set_x, test[1]) # TVT = { # 'train': (train_set_x, train_set_y), # 'valid': (valid_set_x, valid_set_y), # 'test': (test_set_x,test[1]) # } return self # if __name__ == '__main__': # directory = sys.argv[1] # filename = sys.argv[2] # textcol = 0 # if len(sys.argv)>2: textcol = int(sys.argv[3]) # sentcol = 5 # if len(sys.argv)>3: sentcol = int(sys.argv[4]) # # ### I added the following part of codes here # train_size = int(sys.argv[5]) # test_size = int(sys.argv[6]) # ### The main function should take six parameters instead of four # ### The original code is: # ### main(directory, filename, textcol, sentcol) # load(directory, filename, textcol, sentcol, train_size, test_size)