def parse(self, path='shakespeare/sonnets.txt'): self.get_file(path) self.format_file(path) with open('{}.clean'.format(path), 'r') as sonnetfile: remainder = [] project = self.sequence_model.get_or_create_project() for line in sonnetfile: line = line.strip() if line == '' or isroman(line): # start new at a new sonnet (just in case a punctuation mark was missing) remainder = [] continue sentences = re.split('\. |\? |! ', line) sentences = [sentence.split() for sentence in sentences] sentences[0] = remainder + sentences[0] if line.endswith(('.', '?', '!')): remainder = sentences.pop(-1) for sentence in sentences: words = [Word.objects.get_or_create(name=word)[0] for word in sentence] for wordset in list_subsets(words, size=project.max_lookahead): sequence = ShakespeareSequence(title=self.title, project=project) sequence.save() for i in range(len(wordset)): SequenceOrder(word=wordset[i], sequence=sequence, position=i+1).save()
def get(self, request): context = {'project': ShakespeareSequence.get_or_create_project()} return render(request, self.template_name, context)