Exemple #1
0
        for line in stdin:
            line = line.strip()
            if len(line) == 0:
                continue
            line = reformat_url(line)
            if line not in base_urls:
                base_urls.append(line)

    else:
        # its probably a url...
        base_url = reformat_url(base_url)
        base_urls = [base_url]

    model_path = args.model

    classifier = ClassificationEngine.load_model(model_path)
    pool = Pool(size=16)

    offsets = classifier.get_required_requests()
    results = {}

    stderr.write("Identifying panels we can actually reach\n")
    for base_url, r1, r2 in pool.imap_unordered(get_result_wrapper, [(i, "") for i in base_urls]):
        if base_url is not None:
            stderr.write("We can reach {0}\n".format(base_url))
            results[base_url] = {}

    requests_to_make = []
    for offset in offsets:
        for base_url in results.keys():
            requests_to_make.append((base_url, offset))
Exemple #2
0
        for line in stdin:
            line = line.strip()
            if len(line) == 0:
                continue
            line = reformat_url(line)
            if line not in base_urls:
                base_urls.append(line)

    else:
        # its probably a url...
        base_url = reformat_url(base_url)
        base_urls = [base_url]

    model_path = args.model

    classifier = ClassificationEngine.load_model(model_path)
    pool = Pool(size=16)

    offsets = classifier.get_required_requests()
    results = {}

    stderr.write("Identifying panels we can actually reach\n")
    for base_url, r1, r2 in pool.imap_unordered(get_result_wrapper,
                                                [(i, "") for i in base_urls]):
        if base_url is not None:
            stderr.write("We can reach {0}\n".format(base_url))
            results[base_url] = {}

    requests_to_make = []
    for offset in offsets:
        for base_url in results.keys():
Exemple #3
0
                        index], "is actually", tlabels[index]

            print ""
            relevant_features = [(i, 0, raw_features[i])
                                 for i in clf.features_used]
            print len(relevant_features), "features used in this decision tree"
            for rf in relevant_features:
                print rf

            print ""

            decision_trees[label].append({
                "model": clf,
                "features": relevant_features
            })

    sparse_features = []
    features_added = set()
    for label in decision_trees.keys():
        for model in decision_trees[label]:
            for feature in model["features"]:
                if feature[0] not in features_added:
                    features_added.add(feature[0])
                    sparse_features.append((feature[0], feature[2]))

    ce = ClassificationEngine(decision_trees, sparse_features,
                              len(raw_features))
    print ce.get_required_requests()
    ce.save_model("bot_model.mdl")
    #for index in xrange(10):
    #    print ce.get_label_scores(None, vector=vectors[index, :])[0], original_labels[index]
Exemple #4
0
            for index in xrange(vectors.shape[0]):
                if predictions[index] != tlabels[index]:
                    print names[index], "detected as", predictions[index], "is actually", tlabels[index]

            print ""
            relevant_features = [(i, 0, raw_features[i]) for i in
                                 clf.features_used]
            print len(relevant_features), "features used in this decision tree"
            for rf in relevant_features:
                print rf

            print ""

            decision_trees[label].append(
                {"model": clf, "features": relevant_features}
            )

    sparse_features = []
    features_added = set()
    for label in decision_trees.keys():
        for model in decision_trees[label]:
            for feature in model["features"]:
                if feature[0] not in features_added:
                    features_added.add(feature[0])
                    sparse_features.append((feature[0], feature[2]))

    ce = ClassificationEngine(decision_trees, sparse_features, len(raw_features))
    print ce.get_required_requests()
    ce.save_model("bot_model.mdl")
    #for index in xrange(10):
    #    print ce.get_label_scores(None, vector=vectors[index, :])[0], original_labels[index]