def train_model(nodes, datasets):
    y = np.array(range(len(nodes)))
    nodes = list(nodes)
    vectorizer = DictVectorizer(sparse=True)
    for i, dataset in enumerate(datasets):
        g = compute_dataset(dataset)
        nodes.extend(g.classes)
        sys.stdout.write('\r')
        sys.stdout.write(str(i+1))
        sys.stdout.flush()
    X = vectorizer.fit_transform([dict(node.concept_vector) for node in nodes])
    y = y + [-1 for i in range(len(nodes) - len(y))]

    unlabeled = []
    model = LabelSpreading()
    model.fit(X, y)
    model.vectorizer = vectorizer
    return model
예제 #2
0
def train_model(nodes, datasets):
    y = np.array(range(len(nodes)))
    nodes = list(nodes)
    vectorizer = DictVectorizer(sparse=True)
    for i, dataset in enumerate(datasets):
        g = compute_dataset(dataset)
        nodes.extend(g.classes)
        sys.stdout.write('\r')
        sys.stdout.write(str(i + 1))
        sys.stdout.flush()
    X = vectorizer.fit_transform([dict(node.concept_vector) for node in nodes])
    y = y + [-1 for i in range(len(nodes) - len(y))]

    unlabeled = []
    model = LabelSpreading()
    model.fit(X, y)
    model.vectorizer = vectorizer
    return model