def disjunctify(q): ## 'from="david carasso" to=amrit OR to=deep delay>4 AND delay<10 NOT subject=*erik* (rotsky OR rosensteel)' ## =====> ## '(from="david carasso") OR (to=amrit) OR (to=deep) OR (delay>4) OR (delay<10) OR (NOT subject=*erik*) OR ((rotsky OR rosensteel))' return " OR ".join([ "(%s)" % term for term in tu.tokenize(q) if term not in ['OR', 'AND'] ])
def getMostRestrictiveTerm(q, **kwargs): tokens = tu.tokenize(q) if len(tokens) < 2: return q mostRestrictive = None smallestCount = 99999999999 for term in tokens: if term not in ['OR','AND']: count = termPopularity(term, **kwargs) log("term: %s count: %s" % (term, count)) if count < smallestCount: smallestCount = count mostRestrictive = term if count <= RARENESS_IS_GOOD_ENOUGH: log("term %s is rare enough (%s) to use and break early." % (term, count)) break return mostRestrictive
def getMostRestrictiveTerm(q, **kwargs): tokens = tu.tokenize(q) if len(tokens) < 2: return q mostRestrictive = None smallestCount = 99999999999 for term in tokens: if term not in ['OR', 'AND']: count = termPopularity(term, **kwargs) log("term: %s count: %s" % (term, count)) if count < smallestCount: smallestCount = count mostRestrictive = term if count <= RARENESS_IS_GOOD_ENOUGH: log("term %s is rare enough (%s) to use and break early." % (term, count)) break return mostRestrictive
def disjunctify(q): ## 'from="david carasso" to=amrit OR to=deep delay>4 AND delay<10 NOT subject=*erik* (rotsky OR rosensteel)' ## =====> ## '(from="david carasso") OR (to=amrit) OR (to=deep) OR (delay>4) OR (delay<10) OR (NOT subject=*erik*) OR ((rotsky OR rosensteel))' return " OR ".join(["(%s)" % term for term in tu.tokenize(q) if term not in ['OR','AND']])