def predict(message): # file containing the classifier filename = 'finalized_model.sav' loaded_model = joblib.load(filename) # message entered, to be pre processed message = preprocess(message) y = get_features(message) y = y.toarray() #Knowing the number of features in the present converted /pre-processed data tup = y.shape k = tup[1] - 1 # While loop to make the number of features as SVM and adabooster can only work #after association if (k > 56): while (k != 56): y = np.delete(y, k, 1) k -= 1 else: return ("Text too small to obtain features therefore not spam") result_array = loaded_model.predict(y) if (result_array[-1] == 0): return ("Not Spam") else: return ("Spam")
def main(): from pre_process import preprocess feature, a_hat, labels = preprocess() print("loaded") selected, unselected = depart(len(labels), 1 - Config.test_ratio) labels_selected = labels[selected] labels_unselected = labels[unselected] feature = torch.from_numpy(feature).float().cuda() tensor_selected = torch.tensor(labels_selected).long().cuda() a_hat = torch.tensor(a_hat).float().cuda() net = GCN(a_hat, feature.shape[1], Config.num_classes, Config.hidden_size, Config.n_hidden_layer).cuda() print(net) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=Config.lr) net.train() for e in range(Config.num_epochs): optimizer.zero_grad() output = net(feature) loss = criterion(output[selected], tensor_selected) loss.backward() optimizer.step() trained_accuracy = evaluate(output[selected], labels_selected) untrained_accuracy = evaluate(output[unselected], labels_unselected) print( "[Epoch %d]: trained acc: %.7f, untrained acc: %.7f, loss: %.7f" % (e, trained_accuracy, untrained_accuracy, loss.detach().cpu().numpy()))
def tokenize(self, text): """Tokenizes a piece of text.""" text = convert_to_unicode(text) text = preprocess(text, True) text = self._clean_text(text) # This was added on November 1st, 2018 for the multilingual and Chinese # models. This is also applied to the English models now, but it doesn't # matter since the English models were not trained on any Chinese data # and generally don't have any Chinese data in them (there are Chinese # characters in the vocabulary because Wikipedia does have some Chinese # words in the English Wikipedia.). text = self._tokenize_chinese_chars(text) orig_tokens = whitespace_tokenize(text) split_tokens = [] for token in orig_tokens: if self.do_lower_case: token = token.lower() token = self._run_strip_accents(token) split_tokens.extend(self._run_split_on_punc(token)) output_tokens = whitespace_tokenize(" ".join(split_tokens)) return output_tokens
"dims": [1, 1, 128, 128], "data_type": "TYPE_FP32" }, { "name": "bbox_wh", "dims": [1, 2, 128, 128], "data_type": "TYPE_FP32" }, { "name": "center_shift", "dims": [1, 2, 128, 128], "data_type": "TYPE_FP32" }] from pre_process import preprocess import tensorflow as tf sess = tf.Session() preprocess_fn = lambda x: sess.run(preprocess([x]))[0] stream = trt_backend.ImageBatchStream("./calibrator_files", 5, preprocess_fn) int8_calibrator = trt_backend.IInt8EntropyCalibrator2(inputs_def, stream) trt_backend.torch2trt(computation_graph=model, graph_name="detection-network", model_file="./network/dla34.pth", inputs_def=inputs_def, outputs_def=outputs_def, instances=16, gpus=[0, 1, 2, 3], version=1, export_path="../../model_repository", int8_calibrator=int8_calibrator)
def generate_term_list(filename, term_filter): ''' Generate a list of all the terms that appear in a .json file **Parameters** filename: *str* The name of the .json file to be input. term_filter: *str* The type of filter to be used for parsing through all the words in the Tweets. There are six different filters that can be used: default - considers all of the terms remove_stop_words - does not consider stop-words hashtags - only considers hashtags and no other terms terms_only - does not consider hashtags or mentions single_terms - only counts terms once in a Tweet single_stop_words - only counts terms once and does not consider stop-words **Returns** terms: *list, str* A list with all of the individual terms that appear in the .json file. ''' # Define list of common characters used for punctuation punctuation = list(string.punctuation) # Define list of stop-words, which are common words that do not carry # significance (conjunctions, adverbs, etc.) stop = stopwords.words('english') + punctuation + \ ["rt", "via", "…", "’", "“", "”", "‘", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0"] terms = [] # Open the file with open(filename, 'r') as f: # Parse through each line/Tweet in the .json file for line in f: tweet = json.loads(line) # Pre-process the information in the Tweet ppterms = preprocess(tweet['text']) # Apply the appropriate filter as specified by the user if term_filter == "remove_stop_words": terms.append([term for term in ppterms if term not in stop]) elif term_filter == "hashtags": terms.append( [term for term in ppterms if term.startswith('#')]) elif term_filter == "terms_only": terms.append([ term for term in ppterms if term not in stop and not term.startswith(('#', '@')) ]) elif term_filter == "single_terms": temp = [term for term in ppterms] terms.append(list(set(temp))) elif term_filter == "single_stop_words": temp = [term for term in ppterms if term not in stop] terms.append(list(set(temp))) elif term_filter == "default" or term_filter is None: terms.append([term for term in ppterms]) else: raise Exception("Invalid filter type.") return terms
def OnStart(self,event): #主程序启动 pp.preprocess() frame = Result(parent=None, id=-1) frame.Show()
parser.add_argument("-p2", "--population2", type=int, help="population of the GA for the full-route") parser.add_argument("-m2", "--mutationrate2", type=int, help="mutation rate of the GA for the full-route") parser.add_argument("-g2", "--generation2", type=int, help="generation to run for the GA for the full-route") args = parser.parse_args() nodearray = preprocess(args.filename) p1 = 30 m1 = 0.1 g1 = 20 n = int(np.sqrt(len(nodearray) / 2.72015)) p2 = 200 m2 = 0.5 g2 = 20 if args.population1: p1 = args.population1 if args.mutationrate1: m1 = args.mutationrate1