for line in stdin: line = line.strip() if len(line) == 0: continue line = reformat_url(line) if line not in base_urls: base_urls.append(line) else: # its probably a url... base_url = reformat_url(base_url) base_urls = [base_url] model_path = args.model classifier = ClassificationEngine.load_model(model_path) pool = Pool(size=16) offsets = classifier.get_required_requests() results = {} stderr.write("Identifying panels we can actually reach\n") for base_url, r1, r2 in pool.imap_unordered(get_result_wrapper, [(i, "") for i in base_urls]): if base_url is not None: stderr.write("We can reach {0}\n".format(base_url)) results[base_url] = {} requests_to_make = [] for offset in offsets: for base_url in results.keys(): requests_to_make.append((base_url, offset))
for line in stdin: line = line.strip() if len(line) == 0: continue line = reformat_url(line) if line not in base_urls: base_urls.append(line) else: # its probably a url... base_url = reformat_url(base_url) base_urls = [base_url] model_path = args.model classifier = ClassificationEngine.load_model(model_path) pool = Pool(size=16) offsets = classifier.get_required_requests() results = {} stderr.write("Identifying panels we can actually reach\n") for base_url, r1, r2 in pool.imap_unordered(get_result_wrapper, [(i, "") for i in base_urls]): if base_url is not None: stderr.write("We can reach {0}\n".format(base_url)) results[base_url] = {} requests_to_make = [] for offset in offsets: for base_url in results.keys():
index], "is actually", tlabels[index] print "" relevant_features = [(i, 0, raw_features[i]) for i in clf.features_used] print len(relevant_features), "features used in this decision tree" for rf in relevant_features: print rf print "" decision_trees[label].append({ "model": clf, "features": relevant_features }) sparse_features = [] features_added = set() for label in decision_trees.keys(): for model in decision_trees[label]: for feature in model["features"]: if feature[0] not in features_added: features_added.add(feature[0]) sparse_features.append((feature[0], feature[2])) ce = ClassificationEngine(decision_trees, sparse_features, len(raw_features)) print ce.get_required_requests() ce.save_model("bot_model.mdl") #for index in xrange(10): # print ce.get_label_scores(None, vector=vectors[index, :])[0], original_labels[index]
for index in xrange(vectors.shape[0]): if predictions[index] != tlabels[index]: print names[index], "detected as", predictions[index], "is actually", tlabels[index] print "" relevant_features = [(i, 0, raw_features[i]) for i in clf.features_used] print len(relevant_features), "features used in this decision tree" for rf in relevant_features: print rf print "" decision_trees[label].append( {"model": clf, "features": relevant_features} ) sparse_features = [] features_added = set() for label in decision_trees.keys(): for model in decision_trees[label]: for feature in model["features"]: if feature[0] not in features_added: features_added.add(feature[0]) sparse_features.append((feature[0], feature[2])) ce = ClassificationEngine(decision_trees, sparse_features, len(raw_features)) print ce.get_required_requests() ce.save_model("bot_model.mdl") #for index in xrange(10): # print ce.get_label_scores(None, vector=vectors[index, :])[0], original_labels[index]