예제 #1
0
def get_threshold(tok, cp_word, date_figures):
    parse = next(parser.parse(tok))  #First, we parse the whole clause

    # And then we search the grammatical context of cp_word
    # This is most of the time a Prepositional Phrase (PP), a Nominal Phrase (NP) or a Quantifier Phrase (NP)

    pp = None
    sub = parse.subtrees()
    for s in sub:
        if (s.label() == "PP" and s.leaves()[0] == cp_word):
            pp = s
    if pp == None:
        pps = get_subtrees(parse, "PP")
        for p in pps:
            if cp_word in p.leaves():
                pp = p
    if pp == None:
        nps = get_subtrees(parse, "NP")
        for n in nps:
            if cp_word in n.leaves():
                pp = n
    if pp == None:
        qps = get_subtrees(parse, "QP")
        for q in qps:
            if cp_word in q.leaves():
                pp = q

    #If a context is found, we look for the first number appearing after cp_word and not being a date
    if pp != None:
        i = get_index(pp.leaves(),
                      cp_word)  #position of the comp word in the context
        fig = get_nodes(pp,
                        "CD")  #list of all numbers appearing in the context
        n = 0
        for f in fig:
            if (n == 0 and get_index(pp.leaves(), f) > i
                    and (f not in date_figures)):
                n = f

        #and if that number exists, we check if an unit multiplier is written just after
        if n != 0:
            k = get_index(tok, n)  #position of the number in the clause
            mult = 1
            try:
                mult = unit_m[tok[k + 1].lower()]
            except:
                pass
            return (float(n) * mult)

    return None
def find_time(ner, parse, parse_d):

    n = len(ner)
    res = []

    date_from = None
    date_to = None
    date_than = []

    # First, we look at the dates detected by the NER
    for i in range(n):
        if ner[i][1] == "DATE":
            res.append(ner[i][0])

    pps = get_subtrees(parse, "PP")

    # the dates that are years are put in the list years
    years = []
    for a in res:
        if year_format(a):
            years.append(a)
    for b in years:
        res.remove(b)
    tok = parse.leaves()

    # if there is a "than" and years placed after that, they go in date_than
    # and they are removed from years
    if ('than' in tok):
        idx = get_index(tok, 'than')
        for i in range(len(years)):
            if get_index(tok, years[i]) > idx:
                date_than.append(int(years[i]))
    for b in date_than:
        years.remove(str(b))

    # if only 1 year remains, we look at its context
    if len(years) == 1:
        y = years[0]
        link = None
        # the context of a year will often be the Prepositional Phrase (PP) it belongs to
        for pp in pps:
            if y in pp.leaves():
                link = pp.leaves()
        # if no PP was found, we can also look at the words linked to the year in the dependency structure
        if link == None:
            link = find_links(parse_d, y)
        # and then, depending on the words found in the context of the year, we know its function
        if link != []:
            lower_link = lower_list(link)
            if 'in' in lower_link:
                date_from = int(y)
                date_to = int(y)
            # for example, if the year is associated with 'since' (like in "Population in Iran since 1960"), it will be the DATE_FROM
            elif 'since' in lower_link:
                date_from = int(y)
            elif 'after' in lower_link:
                date_from = int(y)
            elif 'till' in lower_link:
                date_to = int(y)
            elif 'before' in lower_link:
                date_to = int(y)
            else:
                date_from = int(y)
                date_to = int(y)
    # if we have 2 years or more, we take the minimum and maximum to have the time period
    elif len(years) >= 2:
        date_from = min(int(years[0]), int(years[1]))
        date_to = max(int(years[0]), int(years[1]))

    # if we have no years but other things tagged as a date
    # we try to find a duration (like in "over the last 8 decades")
    elif res != []:
        duration = 0
        lowered = lower_list(res)
        fig = []
        # we need to have the word "last"
        if "last" in res:
            # Now we know that we probably have a stucture "over the last ..."
            # We can try to find a figure (like the number of years)

            # first, we try to find numbers in the words tagged as dates (they are obviously not years here)
            for r in res:
                try:
                    b = int(r)
                    fig.append(b)
                except:
                    pass

            # if no numbers was found, we try to find a PP containing all the words of the date
            if fig == []:
                pp_date = None

                for pp in pps:
                    c = True
                    leaves = lower_list(pp.leaves())
                    for l in lowered:
                        if l not in leaves:
                            c = False
                    if c == True:
                        pp_date = lower_list(pp.leaves())
                # and if such a PP was found, we try to find numbers in it
                if pp_date != None:
                    if 'last' in pp_date:
                        for a in pp_date:
                            try:
                                b = int(a)
                                fig.append(b)
                            except:
                                pass

            # if no numbers was found, it will be 1

            n = 1
            if len(fig) == 1:
                n = fig[0]

            # now we try to find duration words (with the function)
            dur = get_duration(res)

            # if such a word was found, we take its duration and multiply it by the number found
            # therefore, "8 decades" will be 80 while "5 centuries" will be 500
            if dur != None:
                # if no number was found (hence the default value of 1), it depends whether the word is singular or plural
                # for plural, the default value is 5
                # over the last decades = 50, over the last 3 decades = 30, over the last decade = 10
                if n == 1:
                    if dur[1] == 's':
                        duration = dur[0]
                    elif dur[1] == 'p':
                        duration = dur[0] * 5
                else:
                    duration = dur[0] * n
                if duration > 0:
                    date_to = NOW
                    date_from = NOW - duration

    return (date_from, date_to, date_than)
def find_areas(sent):
    s, tok = replacement(sent)
    parse = next(parser.raw_parse(sent))
    areas = find_areas_in_list(tok)
    pps = get_subtrees(parse, "PP")
    areas_in = []
    areas_to = []
    areas_than = []

    if 'than' in tok:
        idx = get_index(tok, "than")  #position of the "than"
    else:
        idx = len(
            tok
        ) + 10  #if no "than", we put it after all the words (so the condition is never met)

    for a in areas:
        name = a[0]  #name of the area
        type = a[1]  #country or region
        form = a[2]  #adjective 'a' or name 'n'
        classification = None

        if get_index(tok, name) > idx:
            classification = "THAN"
        else:
            #looking at the PP (context) of the area
            p = None
            for pp in pps:
                b = True
                for mot in name.split(" "):
                    if mot not in pp.leaves():
                        b = False
                if b:
                    p = pp.leaves()
            if p != None:

                if (('to' in p) or ('into' in p)
                        or ('towards'
                            in p)):  #words that would indicate a category "TO"
                    classification = "TO"
                elif (('between' in p) and ('and' in p)):
                    if first_word(name, 'and', tok) == 'and':
                        classification = "TO"
                    else:
                        classification = "IN"  #most of the time, the default case is the category "IN"
                else:
                    classification = "IN"
            else:
                classification = "IN"

        #Finally, before adding the area to the list, we change its name to the good format
        #Indeed, until now, the name of the area was written as in the sentence, to find it easily
        #Now, we take the official writing, the same as in the dictionaries
        #Thus, if the word is "INDIA" or "india" or "INDia", now it becomes "India"
        name_f = []
        if form == 'a':
            for adj in demo_list:
                if adj.lower() == name.lower():
                    name_f = [demo_dict[adj], type]
        elif form == 'n':
            if type == 'country':
                for c in country_list:
                    if c.lower() == name.lower():
                        name_f = [c, type]
            elif type == 'region':
                for r in region_list:
                    if r.lower() == name.lower():
                        name_f = [r, type]

        if classification == "IN":
            areas_in.append(name_f)
        elif classification == "TO":
            areas_to.append(name_f)
        elif classification == "THAN":
            areas_than.append(name_f)

    return (areas_in, areas_to, areas_than)
예제 #4
0
def find_aggregators(parse, parse_d, returned, agg_words):

    tok = parse.leaves()
    ner = ner_tagger.tag(tok)
    pos = pos_tagger.tag(tok)
    dep = list(dep_parser.parse(tok))[0]

    # We store the numbers in the sentence that are dates, as it is useful when looking for a threshold
    figures = date_figures(ner, pos, dep)

    # When a comparison or aggregation is in the sentence, the user normally wants a list of something
    # But sometimes, there is not any words specifing the type of the list and so the return is set as a value by default
    # Here, we set temporarly that return value to a list of countries
    # Thus will be useful if a comparison/aggregation is found
    # An example query for such a case would be "Highest GDPs in the world"
    if returned == "Value":
        returned = "Agr_Area"

## Comparative words

# Some comparative words are "threshold-only" and do not require a construction with "than"
    th_words = ["over", "under", "below", "above"]
    th_inf = ["under", "below"]

    # We detect these words
    th_ = catch_words(tok, th_words)
    th = []

    # And just make sure that a threshold is linked to each one (as these words can appear is other contexts)
    for t in th_:
        if get_threshold(tok, t, figures) != None:
            th.append(t)

    # The other comparative words (that we will name comp words) require a structure with "than"
    # Some of them have to be specified (like "superior") but most of them are recognizied easily
    # thanks to specific tags for comparison in the POS tags

    cp_words = ["superior", "inferior"]
    cp_inf = ["less", "lower", "inferior", "poorer"]

    comp_ = get_nodes(parse, "RBR") + get_nodes(parse, "JJR") + catch_words(
        tok, cp_words)
    comp = []

    # Then, we only keep the comparative words followed by a "than"
    # And we also reorder the words at the same time, adding the threshold words in the common list

    k = 0  #determines if a comp word has already been found (used when a "than" is found)
    cp = ""  #current comp word
    for t in tok:
        if t in comp_:
            if k == 0:
                k = 1
                cp = t
            if k == 1:
                cp = t
        elif t in th:
            if k == 1:  #this case happens if a threshold word is found after a comp word but before a potential than
                #in that case, we cannot reasonably consider the comp word as it would create nested comparisons
                k = 0
                cp = ""
            comp.append(t)
        elif t == "than":
            if k == 0:
                raise Exception(
                    "Error 0 : than alone"
                )  #in case a "than" is found but without a comp word before
            elif k == 1:
                k = 0
                comp.append(cp)
                cp = ""

## Comparisons

# Now that we have all the comparative words, we try to cut the sentence in clauses
# Each clause must contain only one comparison (often there is just one clause)

    comparisons = []

    n_comp = len(comp)
    clauses, cuts = cut_in_clause(tok, comp, cut_words)

    if n_comp > 0:
        if len(clauses) == n_comp:
            b = True
            for i in range(n_comp):
                if comp[i] not in clauses[i]:
                    b = False
            if not b:
                raise Exception("Error 1 : problem with clauses")

            # Else, everything is okay and we will now treat each clause separately
            else:
                for i in range(n_comp):
                    clause = clauses[i]
                    word = comp[i]

                    # We parse the clause. That way, we only consider the words of the clause and nothing else
                    # And of course, the result can differ from the parsing of the whole sentence

                    clause_sent = " ".join(clause)
                    clause_parse = next(parser.parse(clause))
                    clause_dep = list(dep_parser.parse(clause))[0]
                    clause_ner = ner_tagger.tag(clause)

                    # Then, we execute the functions find_areas and find_time for the clause
                    areas = find_areas(clause_sent)
                    times = find_time(clause_ner, clause_parse, clause_dep)

                    than_time = times[2]
                    to_time = times[1]
                    in_time = times[0]

                    than_area = areas[2]
                    in_area = areas[0]

                    # Here, we initialize the different variables that describe a comparison

                    comp_type = None  #what is the comparator (a threshold, another country/year, or something else)
                    sens = 'sup'  #is the comparison a "more than" or a "less than"
                    V1 = {
                    }  #elements of Value1 (the first value of the comparison, before "than")
                    V2 = {
                    }  #elements of Value2 (the second value of the comparison, after "than")
                    V = {
                    }  #some elements are not part of the comparison and belongs to both values
                    # Example : "Countries with more population than Germany in 2010" -> we compare everything at the year 2010

                    # Now, we differentiate the treatment between "list of countries" and "list of years"

                    # Countries list
                    if returned == 'Agr_Area':

                        # If the comparative word is "threshold-only"
                        if word in th_words:
                            if word.lower() in th_inf:
                                sens = "inf"

                            # Search of a threshold
                            threshold = get_threshold(clause, word, [])
                            if threshold == None:
                                raise Exception("Error 2 : No threshold found")
                            else:
                                comp_type = "Threshold"
                                V2["THRESHOLD"] = threshold

                            # Search of a time indicator (as we compare values, we cannot have a time series)
                            if ((in_time != None) and (in_time == to_time)):
                                V["TIME"] = in_time

                            # Search of a location indicator
                            # As the used wants a list of countries, he cannot specify a country in the query
                            # But he can give a region ("What countries in Asia ...")
                            region = True
                            r = []
                            for c in in_area:
                                if c[1] == 'country':
                                    region = False
                            if not region:
                                raise Exception(
                                    "Error 3 : Country was mentioned")
                            else:
                                for c in in_area:
                                    r.append(c[0])
                            V["AREA"] = r

                        # Else, the comparative word must belong to a "than" structure
                        else:
                            if 'than' in clause:

                                if word.lower() in cp_inf:
                                    sens = "inf"

                                idx = get_index(
                                    clause, "than"
                                )  #position of the "than", useful to fill V1 & V2

                                # First, we look at the locations
                                # Here, it is possible to mention a country if it is the comparator

                                if len(than_area) == 1:
                                    if than_area[0][1] == "country":
                                        V2["AREA"] = than_area[0][0]
                                        comp_type = "Country"
                                    else:
                                        raise Exception(
                                            "Error 4 : Comparison with a region"
                                        )
                                elif len(than_area) > 1:
                                    raise Exception(
                                        "Error 5 : Too many area mentioned")

                                # It is also possible to mention a region, as before
                                region = True
                                r = []
                                for c in in_area:
                                    if c[1] == 'country':
                                        region = False
                                if not region:
                                    raise Exception(
                                        "Error 3 : Country mentioned")
                                else:
                                    for c in in_area:
                                        r.append(c[0])
                                V["AREA"] = r

                                # Then, the time indicators

                                # If two dates are found on both sides of "than", the first one go in V1 and the other in V2
                                has_than_time = False
                                if (len(than_time) == 1):
                                    if in_time != None:
                                        if (get_index(clause, str(in_time)) <
                                                idx):
                                            V1["TIME"] = in_time
                                            V2["TIME"] = than_time[0]
                                            has_than_time = True
                                            if comp_type == None:
                                                comp_type = "Two"

                                # Else, the year is general (goes in V)
                                if not has_than_time:
                                    if len(than_time) == 1:
                                        V["TIME"] = than_time[0]
                                    elif ((in_time != None)
                                          and (in_time == to_time)):
                                        V["TIME"] = in_time
                                    else:  #in case no date is given, either we raise an error or ask the user, or take a default one (to see later)
                                        #raise Exception("Error 6 : Must precise time period")
                                        pass

                                # If we haven't found yet the type of comparison, we try to find a threshold
                                # If there is not, the comparison is of type "two" (two different values compared)

                                if comp_type == None:
                                    thres = get_threshold(
                                        clause, 'than', than_time)
                                    if thres != None:
                                        comp_type = "Threshold"
                                        V2["THRESHOLD"] = thres

                                if comp_type == None:
                                    comp_type = "Two"

                            else:
                                raise Exception(
                                    "Error 7 : comparison without 'than'")

                    # Years list
                    elif returned == 'Agr_Time':

                        # If threshold word
                        if word in th_words:
                            if word.lower() in th_inf:
                                sens = "inf"

                            threshold = get_threshold(clause, word, [])
                            if threshold == None:
                                raise Exception("Error 2 : No threshold found")
                            else:
                                comp_type = "Threshold"
                                V2["THRESHOLD"] = threshold

                            # As we have a list of years here, we can only have time indicators as a time period (more than one year)
                            if ((in_time != None) and (to_time != None)
                                    and (in_time != to_time)):
                                V["TIME"] = [in_time, to_time]
                            else:
                                V["TIME"] = None

                            # And conversely, the location indicators can only give one country (to be able to compare)
                            if (len(in_area) > 1
                                    or (len(in_area) == 1
                                        and in_area[0][1] == 'region')):
                                raise Exception(
                                    "Error 5 : Too many area mentioned")
                            else:
                                if len(in_area) == 1:
                                    V["AREA"] = in_area[0][0]
                                else:
                                    V["AREA"] = None

                        # If than construction
                        else:
                            if 'than' in clause:

                                if word.lower() in cp_inf:
                                    sens = "inf"

                                idx = get_index(clause, "than")

                                # Get countries

                                # We accept if two countries are given on both sides of "than" : goes in V1 & V2
                                # Else it goes in V and can only be one country
                                if len(than_area) == 1:
                                    if than_area[0][1] == "country":
                                        if (len(in_area) == 1 and in_area[0][1]
                                                == "country"):
                                            V2["AREA"] = than_area[0][0]
                                            V1["AREA"] = in_area[0][0]
                                            comp_type = "Two"
                                        elif (len(in_area) == 0):
                                            V["AREA"] = than_area[0][0]
                                        else:
                                            raise Exception(
                                                "Error 5 : Too many area mentioned"
                                            )
                                    else:
                                        raise Exception(
                                            "Error 4 : Comparison with a region"
                                        )
                                elif len(than_area) > 1:
                                    raise Exception(
                                        "Error 5 : Too many area mentioned")
                                elif (len(than_area) == 0):
                                    if (len(in_area) > 1 or
                                        (len(in_area) == 1
                                         and in_area[0][1] == 'region')):
                                        raise Exception(
                                            "Error 5 : Too many area mentioned"
                                        )
                                    else:
                                        if len(in_area) == 1:
                                            V["AREA"] = in_area[0][0]
                                        else:
                                            V["AREA"] = None

                                # Get times

                                #A specific year can be given by the user as the comparator (comp_type -> "Time")
                                if (len(than_time) == 1):
                                    V2["TIME"] = than_time[0]
                                    comp_type = "Time"
                                elif (len(than_time) > 1):
                                    raise Exception(
                                        "Error 8 : Too many times mentioned")

                                #Else, we accept only a time period
                                if ((in_time != None) and (to_time != None)
                                        and (in_time != to_time)):
                                    V["TIME"] = [in_time, to_time]
                                else:
                                    V["TIME"] = None

                                # If nothing, we do as before and look for a threshold

                                if comp_type == None:
                                    thres = get_threshold(
                                        clause, 'than', than_time)
                                    if thres != None:
                                        comp_type = "Threshold"
                                        V2["THRESHOLD"] = thres

                                if comp_type == None:
                                    comp_type = "Two"

                            else:
                                raise Exception(
                                    "Error 7 : comparison without 'than'")

                    # At the end, we gather everything for that clause and add this to the comparisons list
                    comparisons.append([comp_type, sens, V, V1, V2])

        else:
            raise Exception("Error 9 : number of words and clauses")

## Superlative words

# Aggregation words (or superlative words) are mostly found with their specific tag
# Nonetheless, some have to be specified

    sp_words = ["top", "minimum", "maximum"]

    sup = get_nodes(parse, "RBS") + get_nodes(parse, "JJS") + catch_words(
        tok, sp_words)

    ## Aggregations

    aggreg = None
    sens_sup = None  #sense of the aggregation (max or min)
    n_sup = 1  #number of items to display

    sup_neg = ["least", "lowest", "worst", "minimum"]
    #we also need to know the plural form of the words that could be linked to the aggregation
    agg_plural = ["areas", "countries", "places", "states", "nations", "years"]

    #Sense of the aggregation
    if (sup != []):
        for s in sup:
            if s.lower() in sup_neg:
                sens_sup = 'inf'
        if sens_sup == None:
            sens_sup = 'sup'

    # For the number of items, we look at the context of the superlative words + the words linked to them
    # These words usually form a context as a Nominal Phrase (NP)
    # And in the context, we look for numerical values
    sup_ = sup + agg_words
    nps = get_subtrees(parse, "NP")
    for s in sup_:
        for np in nps:
            if s in np.leaves():
                for a in np.leaves():
                    try:
                        n_sup = int(a)
                    except:
                        pass

    # If no number was found, we look at a potential plural form
    # That would correspond to a default value of 10 items
    if n_sup == 1:
        for w in agg_words:
            if w.lower() in agg_plural:
                n_sup = 10

    if (sup != []):
        aggreg = [sens_sup, n_sup]

    #Finally, we return all the information found
    # 1) The list of comparison (one for each clause)
    # 2) The sense and value of the aggregation (if any)
    return (comparisons, aggreg)
예제 #5
0
def type_of_sentence(parse, parse_d):

    type = "NP"  #type of sentence (NP or WH)
    returned = "Value"  #return type (Value, Agr_Area, Agr_Time)
    count = None  #is a count asked (True, False)
    agg_words = []  #store the identifiers detected in the sentence

    # Determines if the sentence contains a WH-structure
    nodes = []
    for t in parse.subtrees():
        nodes.append(t.label())
    wh = is_wh(nodes)

    # The first case treated is the NP one (which is the default)
    # The reason is that you can have a WH word in a totally NP sentence ("Number of countries in |which| GDP is above ...")
    # So first we check if it is a NP, and if not, we try the WH words

    # NP Sentence

    if (parse[0].label() == "NP"):

        type = "NP"

        # We look at the tokens to see if we can find specific words (area or time identifier)
        # If we do, we look if they are connected to the word "number" like in "number of countries" to determine if there is a count or not
        tok = parse.leaves()
        b = True
        for t in tok:
            if ((t.lower() in area_identifier) and b):
                returned = "Agr_Area"
                agg_words.append(t)
                b = False
                links = lower_list(find_links(parse_d, t.lower()))
                if "number" in links:
                    count = True
            if ((t.lower() in time_identifier) and b):
                returned = "Agr_Time"
                agg_words.append(t)
                b = False
                links = lower_list(find_links(parse_d, t.lower()))
                if "number" in links:
                    count = True
        #if nothing found, the default return is a value
        if returned == None:
            returned = "Value"
        else:
            if count == None:
                count = False

    # Wh-Questions

    elif wh:
        type = "WH"

        #Capture all the possible WH-words of the sentence
        wh_words = get_nodes(parse, "WRB") + get_nodes(
            parse, "WP") + get_nodes(parse, "WDT")

        #Now depending on the WH-word we have, the treatment is different to get all the information
        #If multiple WH-words, we take the first one (which is the most at the beginning)
        #Example : "What are the countries where ...", we only consider "What"

        # HOW
        if (len(wh_words) > 0 and wh_words[0].lower() == "how"):

            #Generally, "how" is followed by an adjective (forming a WHADJP)
            #If so, we have to check if the adjective is "many" or "much" or something else ("how big", "how rich" ...)

            adjp = get_subtrees(parse, "WHADJP")
            if (
                    len(adjp) == 1
            ):  #only 1 WHADJP in the sentence (otherwise, it is complicated)
                jj = get_nodes(adjp[0], "JJ")
                if (jj != []):
                    if (('many' in jj) or ('much' in jj)):
                        # If we have a "how many" or "how much", we try to see if this is part of WHNP ("How many |something| does ...")
                        # And we try to see if the corresponding word is a area/time identifier ("how many countries", "how much time" ...)
                        try:
                            np = get_subtrees(parse, "WHNP")[0]
                            for n in np.leaves():
                                if n.lower() in area_identifier:
                                    returned = "Agr_Area"
                                    agg_words.append(n)
                                    count = True
                                elif n.lower() in time_identifier:
                                    returned = "Agr_Time"
                                    agg_words.append(n)
                                    count = True
            #else, the default will always be "Value"
                            if returned == None:
                                returned = "Value"
                        except:
                            returned = "Value"
                    else:
                        returned = "Value"
                else:
                    returned = "Value"
            else:
                returned = "Value"

        # For "What" and "Which", we try to see if a specific word is linked to that ("Which countries ...", "What are the places ...") or not ("What is the GDP of ...")

        # WHAT
        elif (len(wh_words) > 0 and wh_words[0].lower() == "what"):
            links = find_links(parse_d, "what")
            for l in links:
                if l.lower() in area_identifier:
                    returned = "Agr_Area"
                    agg_words.append(l)
                    count = False
                elif l.lower() in time_identifier:
                    returned = "Agr_Time"
                    agg_words.append(l)
                    count = False
            if returned == None:
                returned = "Value"

        # WHICH
        elif (len(wh_words) > 0 and wh_words[0].lower() == "which"):
            links = find_links(parse_d, "which")
            for l in links:
                if l.lower() in area_identifier:
                    returned = "Agr_Area"
                    agg_words.append(l)
                    count = False
                elif l.lower() in time_identifier:
                    returned = "Agr_Time"
                    agg_words.append(l)
                    count = False
            if returned == None:
                returned = "Value"

        # For the other WH-words, the meaning is directly expressed in the word ("Where" asks for a list of countries ...)

        # WHEN
        elif (len(wh_words) > 0 and wh_words[0].lower() == "when"):
            #print("WH-Questions : WHEN")
            returned = "Agr_Time"
            count = False

        # WHERE
        elif (len(wh_words) > 0 and wh_words[0].lower() == "where"):
            #print("WH-Questions : WHERE")
            returned = "Agr_Area"
            count = False

        # WHO
        elif (len(wh_words) > 0 and wh_words[0].lower() == "who"):
            #print("WH-Questions : WHO")
            returned = "Agr_Area"
            count = False

    # Yes/No Questions (not treated at the moment)

    elif (get_subtrees(parse, "SQ") != []):
        #print("Y/N Question")
        pass

    # Other sentences (order, verbal phrase...)

    # For this type of sentence, we still check if there is a area/time identifier (and a count)

    else:
        #print("Other")
        tok = parse.leaves()
        b = True
        for t in tok:
            if ((t.lower() in area_identifier) and b):
                returned = "Agr_Area"
                agg_words.append(t)
                b = False
                links = lower_list(find_links(parse_d, t.lower()))
                if "number" in links:
                    count = True
            if ((t.lower() in time_identifier) and b):
                returned = "Agr_Time"
                agg_words.append(t)
                b = False
                links = lower_list(find_links(parse_d, t.lower()))
                if "number" in links:
                    count = True

        if returned == None:
            returned = "Value"
        else:
            if count == None:
                count = False

    return (type, returned, count, agg_words)