def train_classifier(): print "Training the classifier" system_classifier = Classifier.objects.first() system_classifier.status = "training" system_classifier.save() print "Fetching Chunks" chunks = Chunk.get_chunks() authors = [] fingerprints = [] for chunk in chunks: if chunk.author: authors.append(chunk.author.name) fingerprints.append(chunk.get_fingerprint_list()) print "Training..." clf = classifier.svm.train_svm(fingerprints, authors) if clf: print "Trained the classifier" store_trained_classifier.delay(clf) return True else: print "Failed to train classifier" system_classifier.status = "untrained" system_classifier.save() return False
def create_text_average_chunk(text_id): text = Text.objects.get(pk=text_id) if (text.average_chunk is not None): text.average_chunk.delete() chunks = Chunk.get_chunks().filter(text=text) print "averaging %i chunks" % (len(chunks)) average_fingerprint = Chunk.get_average_fingerprint_of_chunks(chunks) chunk = Chunk.objects.create() for key in average_fingerprint.keys(): setattr(chunk, key, average_fingerprint[key]) chunk.text = text chunk.save() text.average_chunk = chunk text.save() return True