def parse_conflicts(self, debug=False): """ This method parses the Plotto text into a dictionary of conflicts, where keys represent indices which are the Plotto indices used in the text, and values are the Conflict Objects derived from the parser results. :param debug: Used for debugging :return: None """ sequels = [] with open("../texts/plotto_cleaned.txt", "r") as text: # skipping intro of book for i in range(CONFLICT_START_LINE): text.next() line_number = CONFLICT_START_LINE - 2 paragraph = "" index = "" conflict_theme_name = "" # Represents the 'states' of the FSA find_conflict_theme = True conflict_theme_found = False parse_conflict_subgroup = False parse_conflict_index = False parse_conflict_text = False near_end = False conflict_text = "" conflict_counter = 0 for line, line_number in zip(text, range(CONFLICT_START_LINE, CONFLICT_STOP_LINE)): if find_conflict_theme: if debug: "Trying to find theme" title = re.match('\([0-9]+\) ([a-zA-Z -"].+)', line) if title: conflict_theme_name = title.group(1) conflict_theme_found = True find_conflict_theme = False if debug: print "FOUND THEME : " + title.group(1) continue if conflict_theme_found: # time to parse the conflict if line == "\n": if debug: print "\nTheme name: ", line_number, conflict_theme_name conflict_theme_found = False parse_conflict_index = True else: conflict_theme_name += line if parse_conflict_index: conflict_index_match = re.match('[0-9]+', line) if conflict_index_match: conflict_counter += 1 if debug: print "\n--------\nIndex of conflict = ", line, "\n------------\n" index = line.strip() parse_conflict_subgroup = True parse_conflict_index = False try: current_conflict = int(index) if current_conflict != conflict_counter: print "Fix: ", conflict_counter, "at line: ", line_number return except: print "Failed at: ", index, "at line number: ", line_number if parse_conflict_subgroup: conflict_data = re.match('\((?P<sub_index>[a-z0-9]_*)\)* (?P<prequels>(?:.+))', line) # needs to also match indices with no sub-indices no_subindex = re.match('(?P<prequels>\((?:.+)\))', line) if conflict_data: if debug: print "---------------------\nWith data: ", line parse_conflict_subgroup = False sub_index = conflict_data.group('sub_index').strip() if debug: print "Sub index: ", sub_index if debug: print "Full list = ", conflict_data.group('prequels'), "\n---------------------" prequels = self.parse_prequels(conflict_data.group('prequels')) parse_conflict_text = True parse_conflict_subgroup = False continue if no_subindex: if debug: print "no_subindex" sub_index = "" prequels = self.parse_prequels(no_subindex.group('prequels')) parse_conflict_text = True parse_conflict_subgroup = False continue if parse_conflict_text: conflict_text += line.strip() # coming to end of sub conflict when sequels begin being listed check = re.findall('\((?:.+)\)', line) if len(check) > 0: for ind in check: sequels.append(ind) near_end = True if near_end and line == "\n": parse_conflict_text = False parse_conflict_subgroup = True parse_conflict_index = True near_end = False sequels2 = [] for sequel in sequels: removed_text = sequel conflict_text = conflict_text.replace(removed_text, "") sequels2.extend(self.parse_prequels(sequel)) conflict = Conflict(conflict_theme_name, index, sub_index, prequels, sequels2, conflict_text) if debug: print "\n\n", conflict.to_string(), "\n\n" self.conflicts[index + sub_index] = conflict conflict_text = "" sequels = [] find_conflict_theme = True continue
def conflict_decoder(obj): return Conflict.from_file(obj['theme'], obj['index'], obj['sub_index'], obj['prequels'], obj['sequels'], obj['conflict_text'])