Exemple #1
0
    def closest_to(klass, term):
        try:
            # words to exclude when attempting to match country names
            exclude = set(["vaccine"])
            big_term = term.lower()
            vax_tuples = []
            for obj in klass.objects.all():
                big_en = ""
                big_fr = ""
                if obj.abbr_en is not None:
                    big_en = obj.abbr_en.lower()
                    # calculate edit distance between word from term and word from english name
                    vax_tuples.append((dm(big_term, big_en), obj, obj.abbr_en))
                if obj.abbr_fr is not None:
                    big_fr = obj.abbr_fr.lower()
                    if big_fr != big_en:
                        # calculate edit distance between word from term and word from french name
                        vax_tuples.append((dm(big_term, big_fr), obj, obj.abbr_fr))

            # sort tuples by ascending edit distance, pluck the 5 closest matches
            closest = sorted(vax_tuples, key=itemgetter(0))[:10]
            # return only the objects
            return set(map(itemgetter(1), closest))
        except Exception, e:
            print 'BANG'
            print e
Exemple #2
0
    def country_aware_closest_to(klass, term, country_pk):
        try:
            big_term = term.lower()
            # grrr edge cases
            big_term = big_term.replace('meningitis', 'mening')
            big_term = big_term.replace('measles', 'mea')
            countrystocks = CountryStock.objects.filter(country=country_pk)
            # fetch all vaccines that this country has stocks of
            cs_vaccines = map(attrgetter('vaccine'), countrystocks)

            # get list of vaccines who's name and/or any abbreviations
            # contain any words in the given term
            vax_partials = [obj for obj in klass.objects.all()\
                if not set(_split_term(big_term)).isdisjoint(set(obj._field_list()))]
            if len(vax_partials) > 0:
                vax_partials_groups = set(map(attrgetter('group'), vax_partials))
                vax_relatives = list(itertools.chain(*[g.vaccine_set.all() for g in vax_partials_groups]))
                only_objs = set(vax_partials).union(set(vax_relatives))
            # if there are no overlapping terms, try to find
            # similar vaccines by edit distance
            else:
                vax_tuples = []
                for obj in klass.objects.all():
                    big_en = ""
                    big_fr = ""
                    if obj.abbr_en is not None:
                        big_en = obj.abbr_en.lower()
                        # calculate edit distance between word from term and word from english name
                        vax_tuples.append((dm(big_term, big_en), obj, obj.abbr_en))
                    if obj.abbr_fr is not None:
                        big_fr = obj.abbr_fr.lower()
                        if big_fr != big_en:
                            # calculate edit distance between word from term and word from french name
                            vax_tuples.append((dm(big_term, big_fr), obj, obj.abbr_fr))

                # sort tuples by ascending edit distance, pluck closest matches
                closest = sorted(vax_tuples, key=itemgetter(0))[:10]
                # return only the objects
                only_objs = set(map(itemgetter(1), closest)).union(vax_partials)

            # see if any closest matches are vaccines this country stocks
            if only_objs.isdisjoint(set(cs_vaccines)):
                # if not, return top matches and None
                return list(only_objs)[:20], None
            else:
                # if so, return the ones that country doesnt stock and the ones that the country does stock
                return only_objs.difference(set(cs_vaccines)), only_objs.intersection(set(cs_vaccines))

        except Exception, e:
            print 'BANG country aware closest to'
            print e
Exemple #3
0
    def closest_to(klass, term):
        try:
            # grr edge cases...
            if 'OPT' in term.upper():
                term = 'palestinian'
            if 'PALESTINE' in term.upper():
                term = 'palestinian'
            if 'GAZA' in term.upper():
                term = 'palestinian'
            if "SERBIA" in term.upper():
                term = 'serbia and montenegro'
            if "LIBYA" in term.upper():
                term = 'libyan arab JAMAHIRIYA'
            if "DRC" in term.upper():
                term = 'congo'
            if "IVORY" in term.upper():
                term = "COTE D'IVOIRE"
            if "VERT" in term.upper():
                term = "cape verde"
            if "PACIFIC" in term.upper():
                term = "fiji"
            # words to exclude when attempting to match country names
            exclude = set(["democratic", "peoples", "republic", "the", "of", "american", "french", "brazzaville", "islamic", "people's", "territory", "kingdom", "démocratique", "république", "territories", "françaises", "française", "islands", "british", "britannique", "américaines", "britanniques", "western", "occidental", "république-unie", "république", "l'ex-république", "démocratique", "equatorial", "équatoriale", "territoire", "plurinational", "américaines", "conakry", "states", "états", "outlying", "éloignées", "federation", "fédération", "pays", "sultanate"])
            # replace hyphens, exclude any words from exclude set, and pluck the longest remaining word
            big_term = max(set(term.lower().replace('-', ' ').split()).difference(exclude), key=len)
            country_tuples = []
            for obj in klass.objects.all():
                big_en = ""
                big_fr = ""
                if obj.name is not None:
                    # pluck longest word in english name that does not appear in exclude
                    big_en = max(set(word.lower().strip(",") for word in obj.name.replace('-', ' ').split()).difference(exclude), key=len)
                    # calculate edit distance between word from term and word from english name
                    country_tuples.append((dm(big_term, big_en), obj, obj.name))
                if obj.name_fr is not None:
                    # pluck longest word in french name that does not appear in exclude
                    big_fr = max(set(word.lower().strip(",") for word in obj.name_fr.replace('-', ' ').split()).difference(exclude), key=len)
                    if big_fr != big_en:
                        # calculate edit distance between word from term and word from french name
                        country_tuples.append((dm(big_term, big_fr), obj, obj.name_fr))

            # sort tuples by ascending edit distance, pluck the 5 closest matches
            closest = sorted(country_tuples, key=itemgetter(0))[:5]
            # return only the objects
            return set(map(itemgetter(1), closest))
        except Exception, e:
            print 'BANG'
            print e