Example #1
0
    def parse(self, path='shakespeare/sonnets.txt'):
        self.get_file(path)
        self.format_file(path)

        with open('{}.clean'.format(path), 'r') as sonnetfile:
            remainder = []
            project = self.sequence_model.get_or_create_project()

            for line in sonnetfile:
                line = line.strip()
                if line == '' or isroman(line):
                    # start new at a new sonnet (just in case a punctuation mark was missing)
                    remainder = []
                    continue

                sentences = re.split('\. |\? |! ', line)
                sentences = [sentence.split() for sentence in sentences]
                sentences[0] = remainder + sentences[0]

                if line.endswith(('.', '?', '!')):
                    remainder = sentences.pop(-1)

                for sentence in sentences:
                    words = [Word.objects.get_or_create(name=word)[0] for word in sentence]

                    for wordset in list_subsets(words, size=project.max_lookahead):
                        sequence = ShakespeareSequence(title=self.title, project=project)
                        sequence.save()

                        for i in range(len(wordset)):
                            SequenceOrder(word=wordset[i], sequence=sequence, position=i+1).save()
Example #2
0
 def get(self, request):
     context = {'project': ShakespeareSequence.get_or_create_project()}
     return render(request, self.template_name, context)