Ejemplo n.º 1
0
    def parse_conflicts(self, debug=False):
        """
        This method parses the Plotto text into a dictionary of conflicts,
        where keys represent indices which are the Plotto indices used in the text,
        and values are the Conflict Objects derived from the parser results.

        :param debug: Used for debugging
        :return: None
        """
        sequels = []
        with open("../texts/plotto_cleaned.txt", "r") as text:
            # skipping intro of book
            for i in range(CONFLICT_START_LINE):
                text.next()

            line_number = CONFLICT_START_LINE - 2
            paragraph = ""
            index = ""
            conflict_theme_name = ""
            # Represents the 'states' of the FSA
            find_conflict_theme = True
            conflict_theme_found = False
            parse_conflict_subgroup = False
            parse_conflict_index = False
            parse_conflict_text = False
            near_end = False

            conflict_text = ""
            conflict_counter = 0

            for line, line_number in zip(text, range(CONFLICT_START_LINE, CONFLICT_STOP_LINE)):

                if find_conflict_theme:
                    if debug: "Trying to find theme"
                    title = re.match('\([0-9]+\) ([a-zA-Z -"].+)', line)
                    if title:
                        conflict_theme_name = title.group(1)
                        conflict_theme_found = True
                        find_conflict_theme = False
                        if debug: print "FOUND THEME : " + title.group(1)
                        continue

                if conflict_theme_found:  # time to parse the conflict
                    if line == "\n":
                        if debug: print "\nTheme name: ", line_number, conflict_theme_name
                        conflict_theme_found = False
                        parse_conflict_index = True

                    else:
                        conflict_theme_name += line

                if parse_conflict_index:

                    conflict_index_match = re.match('[0-9]+', line)
                    if conflict_index_match:
                        conflict_counter += 1
                        if debug: print "\n--------\nIndex of conflict = ", line, "\n------------\n"
                        index = line.strip()
                        parse_conflict_subgroup = True
                        parse_conflict_index = False
                        try:
                            current_conflict = int(index)
                            if current_conflict != conflict_counter:
                                print "Fix: ", conflict_counter, "at line: ", line_number
                                return
                        except:
                            print "Failed at: ", index, "at line number: ", line_number

                if parse_conflict_subgroup:

                    conflict_data = re.match('\((?P<sub_index>[a-z0-9]_*)\)* (?P<prequels>(?:.+))', line)
                    # needs to also match indices with no sub-indices
                    no_subindex = re.match('(?P<prequels>\((?:.+)\))', line)

                    if conflict_data:
                        if debug: print "---------------------\nWith data: ", line
                        parse_conflict_subgroup = False
                        sub_index = conflict_data.group('sub_index').strip()
                        if debug: print "Sub index: ", sub_index

                        if debug: print "Full list = ", conflict_data.group('prequels'), "\n---------------------"
                        prequels = self.parse_prequels(conflict_data.group('prequels'))

                        parse_conflict_text = True
                        parse_conflict_subgroup = False
                        continue

                    if no_subindex:
                        if debug: print "no_subindex"
                        sub_index = ""
                        prequels = self.parse_prequels(no_subindex.group('prequels'))

                        parse_conflict_text = True
                        parse_conflict_subgroup = False
                        continue

                if parse_conflict_text:

                    conflict_text += line.strip()
                    # coming to end of sub conflict when sequels begin being listed
                    check = re.findall('\((?:.+)\)', line)

                    if len(check) > 0:
                        for ind in check:
                            sequels.append(ind)
                        near_end = True

                    if near_end and line == "\n":

                        parse_conflict_text = False
                        parse_conflict_subgroup = True
                        parse_conflict_index = True
                        near_end = False
                        sequels2 = []
                        for sequel in sequels:
                            removed_text = sequel
                            conflict_text = conflict_text.replace(removed_text, "")
                            sequels2.extend(self.parse_prequels(sequel))

                        conflict = Conflict(conflict_theme_name, index, sub_index, prequels, sequels2, conflict_text)
                        if debug: print "\n\n", conflict.to_string(), "\n\n"
                        self.conflicts[index + sub_index] = conflict
                        conflict_text = ""
                        sequels = []
                        find_conflict_theme = True
                        continue
Ejemplo n.º 2
0
def conflict_decoder(obj):
    return Conflict.from_file(obj['theme'], obj['index'], obj['sub_index'],
                              obj['prequels'], obj['sequels'], obj['conflict_text'])