def chunk(self, bookId, start, end): self.cursor.execute('SELECT book_id, position, word, raw FROM word_index ' + 'WHERE position BETWEEN %s AND %s ' % (start, end) + 'AND raw IS NOT NULL ' + 'AND book_id="%s" ' % bookId + 'ORDER BY position') tokens = [] for row in self.cursor.fetchall(): tokens.append(flaf_types.readToken(row)) return tokens
def getContexts(self, requests): if not requests: return []; self.cursor.execute('SELECT position, word, raw, book_id FROM word_index ' + ' WHERE ' + '(' + 'OR '.join(map(lambda request: '(position BETWEEN %s AND %s AND book_id=%s) ' % ( request['position'] - request['numWordsBefore'], request['position'] + request['numWordsAfter'] + 1, request['bookId'] ), requests)) + ') ' + 'ORDER BY position') # build map from position to token tokenMap = {} for row in self.cursor.fetchall(): token = flaf_types.readToken(row) bookId = row[3] if bookId not in tokenMap: tokenMap[bookId] = {}; tokenMap[bookId][token['position']] = token # build an array of contexts from the map, with one context object # per request. contexts = [] for request in requests: tokenMapForBook = tokenMap[request['bookId']]; context = { 'bookId': request['bookId'], 'token': tokenMapForBook[request['position']], 'before': [], 'after': [] } # for each request, build an array from the words before and after for i in range(1, request['numWordsBefore'] + 1): tokenPosition = request['position'] - i; if (tokenPosition > 0): context['before'].append(tokenMapForBook[tokenPosition]) context['before'].reverse() for i in range(1, request['numWordsAfter'] + 1): tokenPosition = request['position'] + i; if (tokenPosition in tokenMapForBook): context['after'].append(tokenMapForBook[tokenPosition]) contexts.append(context) return contexts