def handle(self, *args, **options): path_to_texts = os.path.join(BASE_DIR, 'input_texts') comp = TreeComparer() # Iterate over all the input texts, break each up into sentences and gather all into one # list of sentences. sr = SentenceReader() sentences = [] for file in os.listdir(path_to_texts): path = os.path.join(path_to_texts, file) sentences.extend([sentence for sentence in sr.get_sentences(path)]) # For each of the sentences, create a list of parsed objects. These are just tuples with, # the sentence, the list of chunks and the parse tree as a string print("*** Parsing sentences: ") parser = Parser() parsed_objects = [ parser.parse(sentence) for sentence in tqdm(sentences) ] # Iterate over the list of parsed objects. You need to create a Question for each chunk, with the # sentence and the parse tree for the chunk. # Make a list of Questions print("*** Creating Questions: ") translator = Translator() for par_obj in tqdm(parsed_objects): # Here we iterate over the chunks for each sentence and create a Question for each. whole_sentence = par_obj[0] sentence_tree_string = par_obj[2] sentence_object = Sentence.objects.get_or_create( sentence=whole_sentence, sentence_tree_string=sentence_tree_string)[0] for chunk in par_obj[1]: # check if suitable chunk_length = len(chunk.split(' ')) if 4 <= chunk_length <= 8: chunk_tree = parser.parse(chunk)[2] chunk_translation = translator.get_translation(chunk) question = Question.objects.get_or_create( sentence=sentence_object, chunk=chunk, chunk_translation=chunk_translation, chunk_tree_string=chunk_tree)[0] question.question_tree_string = comp.remove_chunk_from_parse_tree( question) question.save()
def get_features_pos(pk): parser = Parser() # get the card and tree string card = Card.objects.get(pk=pk) s = card.sentence.sentence tree = parser.parse(s)[2] # get a list of the tags in the parse tree x = re.findall(r'[A-HJ-Z]+', tree) # create a dict for this sentence dict = {} for tag in tags: dict[tag] = x.count(tag) # return an array of values for the dict return [val for val in dict.values()]