def train_model(nodes, datasets): y = np.array(range(len(nodes))) nodes = list(nodes) vectorizer = DictVectorizer(sparse=True) for i, dataset in enumerate(datasets): g = compute_dataset(dataset) nodes.extend(g.classes) sys.stdout.write('\r') sys.stdout.write(str(i+1)) sys.stdout.flush() X = vectorizer.fit_transform([dict(node.concept_vector) for node in nodes]) y = y + [-1 for i in range(len(nodes) - len(y))] unlabeled = [] model = LabelSpreading() model.fit(X, y) model.vectorizer = vectorizer return model
def train_model(nodes, datasets): y = np.array(range(len(nodes))) nodes = list(nodes) vectorizer = DictVectorizer(sparse=True) for i, dataset in enumerate(datasets): g = compute_dataset(dataset) nodes.extend(g.classes) sys.stdout.write('\r') sys.stdout.write(str(i + 1)) sys.stdout.flush() X = vectorizer.fit_transform([dict(node.concept_vector) for node in nodes]) y = y + [-1 for i in range(len(nodes) - len(y))] unlabeled = [] model = LabelSpreading() model.fit(X, y) model.vectorizer = vectorizer return model