コード例 #1
0
def test_value_stats():
    """Simple test of being able to get value statistics.

    """
    dbpath = 'db_test_value_stats'
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OVERWRITE)

    vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
    for id in range(10):
        doc = xapian.Document()
        doc.add_value(1, xapian.sortable_serialise(vals[id]))
        db.add_document(doc)

    expect(db.get_value_freq(0), 0)
    expect(db.get_value_lower_bound(0), "")
    expect(db.get_value_upper_bound(0), "")
    expect(db.get_value_freq(1), 10)
    expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
    expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
    expect(db.get_value_freq(2), 0)
    expect(db.get_value_lower_bound(2), "")
    expect(db.get_value_upper_bound(2), "")

    db.close()
    shutil.rmtree(dbpath)
コード例 #2
0
ファイル: fieldmap.py プロジェクト: anhnguyendepocen/flaxcode
    def range_query(self, fieldname, value1, value2):
        """Construct a xapian.Query object representing a value range.
        
        `fieldname` is the field to search.
        `value1` and `value2` define the range, inclusively.
        
        The values must be of the same type (int, float or datetime). In the
        latter case, the fieldmap will generate helper terms to try to
        optimise the query.
        
        """
        if type(value1) is not type(value2):
            raise SearchError, 'cannot mix types in a query range'

        try:
            prefix, valnum, isfilter = self._fieldmap[fieldname]
        except KeyError:
            raise SearchError, 'fieldname %s not in fieldmap' % fieldname

        if isinstance(value1, int) or isinstance(value1, float):
            return xapian.Query(xapian.Query.OP_VALUE_RANGE, valnum,
                                xapian.sortable_serialise(value1),
                                xapian.sortable_serialise(value2))

        elif isinstance(value1, datetime):
            #            term = '%s%04d%02d%02d' % (prefix, v.year, v.month, v.day)
            #            strv = '%04d%02d%02d%02d%02d%02d' % (
            #                v.year, v.month, v.day, v.hour, v.minute, v.second)
            #           FIXME - helper terms?
            return xapian.Query(
                xapian.Query.OP_VALUE_RANGE, valnum,
                xapian.sortable_serialise(time.mktime(value1.timetuple())),
                xapian.sortable_serialise(time.mktime(value2.timetuple())))
コード例 #3
0
    def indexDeb822(self, document, pkg):
        """
        Update the document with the information from this data source.

        This is alternative to index, and it is used when indexing with package
        data taken from a custom Packages file.

        document  is the document to update
        pkg       is the Deb822 object for this package
        """
        try:
            instSize = int(pkg["Installed-Size"])
            pkgSize = int(pkg["Size"])
        except:
            return

        if self.val_inst_size != -1:
            try:
                document.add_value(self.val_inst_size,
                                   xapian.sortable_serialise(instSize))
            except (OverflowError, SystemError):
                pass
        if self.val_pkg_size != -1:
            try:
                document.add_value(self.val_pkg_size,
                                   xapian.sortable_serialise(pkgSize))
            except (OverflowError, SystemError):
                pass
コード例 #4
0
    def index_document(self, document, properties):
        document.add_value(
            _VALUE_TIMESTAMP,
            xapian.sortable_serialise(float(properties['timestamp'])))
        document.add_value(_VALUE_TITLE, properties.get('title', '').strip())
        if 'filesize' in properties:
            try:
                document.add_value(
                    _VALUE_FILESIZE,
                    xapian.sortable_serialise(int(properties['filesize'])))
            except (ValueError, TypeError):
                logging.debug('Invalid value for filesize property: %s',
                              properties['filesize'])
        if 'creation_time' in properties:
            try:
                document.add_value(
                    _VALUE_CREATION_TIME,
                    xapian.sortable_serialise(
                        float(properties['creation_time'])))
            except (ValueError, TypeError):
                logging.debug('Invalid value for creation_time property: %s',
                              properties['creation_time'])

        self.set_document(document)

        properties = dict(properties)
        self._index_known(document, properties)
        self._index_unknown(document, properties)
コード例 #5
0
ファイル: pythontest2.py プロジェクト: wangeguo/xapian
def test_value_stats():
    """Simple test of being able to get value statistics.

    """
    dbpath = "db_test_value_stats"
    db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE)

    vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
    for id in range(10):
        doc = xapian.Document()
        doc.add_value(1, xapian.sortable_serialise(vals[id]))
        db.add_document(doc)

    expect(db.get_value_freq(0), 0)
    expect(db.get_value_lower_bound(0), "")
    expect(db.get_value_upper_bound(0), "")
    expect(db.get_value_freq(1), 10)
    expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
    expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
    expect(db.get_value_freq(2), 0)
    expect(db.get_value_lower_bound(2), "")
    expect(db.get_value_upper_bound(2), "")

    db.close()
    shutil.rmtree(dbpath)
コード例 #6
0
    def index(self, document, pkg):
        """
        Update the document with the information from this data source.

        document  is the document to update
        pkg       is the python-apt Package object for this package
        """
        ver = pkg.candidate
        if ver is None: return
        try:
            instSize = ver.installed_size
            pkgSize = ver.size
        except:
            return

        if self.val_inst_size != -1:
            try:
                document.add_value(self.val_inst_size,
                                   xapian.sortable_serialise(instSize))
            except (OverflowError, SystemError):
                pass
        if self.val_pkg_size != -1:
            try:
                document.add_value(self.val_pkg_size,
                                   xapian.sortable_serialise(pkgSize))
            except (OverflowError, SystemError):
                pass
コード例 #7
0
 def __call__(self, doc):
     app = Application(self.db.get_appname(doc), self.db.get_pkgname(doc))
     stats = self.review_loader.get_review_stats(app)
     import xapian
     if stats:
         return xapian.sortable_serialise(stats.dampened_rating)
     return xapian.sortable_serialise(0)
コード例 #8
0
    def index_document(self, document, properties):
        document.add_value(_VALUE_TIMESTAMP,
            xapian.sortable_serialise(float(properties['timestamp'])))
        document.add_value(_VALUE_TITLE, properties.get('title', '').strip())
        if 'filesize' in properties:
            try:
                document.add_value(_VALUE_FILESIZE,
                    xapian.sortable_serialise(int(properties['filesize'])))
            except (ValueError, TypeError):
                logging.debug('Invalid value for filesize property: %s',
                              properties['filesize'])
        if 'creation_time' in properties:
            try:
                document.add_value(
                    _VALUE_CREATION_TIME, xapian.sortable_serialise(
                        float(properties['creation_time'])))
            except (ValueError, TypeError):
                logging.debug('Invalid value for creation_time property: %s',
                              properties['creation_time'])

        self.set_document(document)

        properties = dict(properties)
        self._index_known(document, properties)
        self._index_unknown(document, properties)
コード例 #9
0
ファイル: xapian_backend.py プロジェクト: rob-b/Grab
 def __call__(self, begin, end):
     """
     Construct a tuple for value range processing.
     
     `begin` -- a string in the format '<field_name>:[low_range]'
                If 'low_range' is omitted, assume the smallest possible value.
     `end` -- a string in the the format '[high_range|*]'.  If '*', assume
              the highest possible value.
     
     Return a tuple of three strings: (column, low, high)
     """
     colon = begin.find(':')
     field_name = begin[:colon]
     begin = begin[colon + 1:len(begin)]
     for field_dict in self.sb.schema:
         if field_dict['field_name'] == field_name:
             if not begin:
                 if field_dict['type'] == 'text':
                     begin = u'a' # TODO: A better way of getting a min text value?
                 elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
                     begin = float('-inf')
                 elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
                     begin = u'00010101000000'
             elif end == '*':
                 if field_dict['type'] == 'text':
                     end = u'z' * 100 # TODO: A better way of getting a max text value?
                 elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
                     end = float('inf')
                 elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
                     end = u'99990101000000'
             if field_dict['type'] == 'long' or field_dict['type'] == 'float':
                 begin = xapian.sortable_serialise(float(begin))
                 end = xapian.sortable_serialise(float(end))
             return field_dict['column'], str(begin), str(end)
コード例 #10
0
ファイル: database.py プロジェクト: Alberto-Beralix/Beralix
 def __call__(self, doc):
     app = Application(self.db.get_appname(doc),
                       self.db.get_pkgname(doc))
     stats = self.review_loader.get_review_stats(app)
     import xapian
     if stats:
         return xapian.sortable_serialise(stats.dampened_rating)
     return xapian.sortable_serialise(0)
コード例 #11
0
def get_msg_terms(db=None,msg=None):

#   This is pretty important: what data to be shown from the thing?
#   Maybe should be parsed into json already? Ot serialise a hash somehow?
    doc_data = msg.content
    doc_values = []

    doc_terms = []
    
    stemmer = xapian.Stem("finnish")

    for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(msg.content)):
        word = match.group(0)
        if is_stopword(word):
            continue
        term = stemmer(word)
        doc_terms.append(term)

    for term in ["_commented-by_"+msg.author]:
        doc_terms.append(term)

    if msg.date:
        doc_terms.append("_c_"+str(msg.date)[:7])
                   
    official_terms = ["_o_"+msg.id]

    if msg.places:
        place = db.place.getnode(msg.places[0])
        for term in get_place_terms(place = place):
            doc_terms.append (term)


        for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(place.address)):
            word = match.group(0)
            if is_stopword(word):
                continue
            term = stemmer(word)
            #print "adding term "+term
            doc_terms.append(term)


        doc_data += "  " + place.address

        for term in get_latlng_range(place.lat):
            doc_terms.append("_glatrange_"+term)
        for term in get_latlng_range(place.lng):
            doc_terms.append("_glngrange_"+term)

        
        doc_values.append({"field": XAPIAN_X_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lat))})
        doc_values.append({"field": XAPIAN_Y_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lng))})	
    if msg.date:
    	doc_values.append({"field": XAPIAN_CREATED_FIELD, "value": xapian.sortable_serialise( float( msg.date.serialise() ) ) })


    return {"doc_data":doc_data,
            "doc_terms":doc_terms,
            "doc_values":doc_values }
コード例 #12
0
def index(datapath, dbpath):
    # Create or open the database we're going to be writing to.
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)

    # Set up a TermGenerator that we'll use in indexing.
    termgenerator = xapian.TermGenerator()
    termgenerator.set_stemmer(xapian.Stem("en"))

    for fields in parse_states(datapath):
        # 'fields' is a dictionary mapping from field name to value.
        # Pick out the fields we're going to index.
        name = fields.get('name', u'')
        description = fields.get('description', u'')
        motto = fields.get('motto', u'')
        admitted = fields.get('admitted', None)
        population = fields.get('population', None)
        order = fields.get('order', u'')

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)

        # index each field with a suitable prefix
        termgenerator.index_text(name, 1, 'S')
        termgenerator.index_text(description, 1, 'XD')
        termgenerator.index_text(motto, 1, 'XM')

        # Index fields without prefixes for general search.
        termgenerator.index_text(name)
        termgenerator.increase_termpos()
        termgenerator.index_text(description)
        termgenerator.increase_termpos()
        termgenerator.index_text(motto)

        # Add document values.
        if admitted is not None:
            doc.add_value(1, xapian.sortable_serialise(int(admitted[:4])))
            doc.add_value(2, admitted)  # YYYYMMDD
        if population is not None:
            doc.add_value(3, xapian.sortable_serialise(int(population)))
### Start of example code.
        midlat = fields['midlat']
        midlon = fields['midlon']
        if midlat and midlon:
            doc.add_value(4, "%f,%f" % (float(midlat), float(midlon)))


### End of example code.

# Store all the fields for display purposes.
        doc.set_data(json.dumps(fields))

        # We use the order to ensure each object ends up in the
        # database only once no matter how many times we run the
        # indexer.
        idterm = u"Q" + order
        doc.add_boolean_term(idterm)
        db.replace_document(idterm, doc)
コード例 #13
0
def index(datapath, dbpath):
    # Create or open the database we're going to be writing to.
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)

    # Set up a TermGenerator that we'll use in indexing.
    termgenerator = xapian.TermGenerator()
    termgenerator.set_stemmer(xapian.Stem("en"))

    for fields in parse_states(datapath):
        # 'fields' is a dictionary mapping from field name to value.
        # Pick out the fields we're going to index.
        name = fields.get('name', u'')
        description = fields.get('description', u'')
        motto = fields.get('motto', u'')
        admitted = fields.get('admitted', None)
        population = fields.get('population', None)
        order = fields.get('order', u'')

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)

        # index each field with a suitable prefix
        termgenerator.index_text(name, 1, 'S')
        termgenerator.index_text(description, 1, 'XD')
        termgenerator.index_text(motto, 1, 'XM')

        # Index fields without prefixes for general search.
        termgenerator.index_text(name)
        termgenerator.increase_termpos()
        termgenerator.index_text(description)
        termgenerator.increase_termpos()
        termgenerator.index_text(motto)

        # Add document values.
        if admitted is not None:
            doc.add_value(1, xapian.sortable_serialise(int(admitted[:4])))
            doc.add_value(2, admitted) # YYYYMMDD
        if population is not None:
            doc.add_value(3, xapian.sortable_serialise(population))
### Start of example code.
        midlat = fields['midlat']
        midlon = fields['midlon']
        if midlat and midlon:
            doc.add_value(4, "%f,%f" % (midlat, midlon))
### End of example code.

        # Store all the fields for display purposes.
        doc.set_data(json.dumps(fields))

        # We use the identifier to ensure each object ends up in the
        # database only once no matter how many times we run the
        # indexer.
        idterm = u"Q" + order
        doc.add_boolean_term(idterm)
        db.replace_document(idterm, doc)
コード例 #14
0
def create_index(filename,databasePath):
  print "begin read",filename
  if not os.path.exists(databasePath):
    os.makedirs(databasePath)
  database = xapian.WritableDatabase(databasePath, xapian.DB_CREATE_OR_OPEN)
  stemmer=xapian.Stem('english')
  rex=re.compile(r'[0-9]+|[a-zA-Z]+|[\x80-\xff3]{3}')
  lines=open(filename).readlines()
  processed=0
  len_file=len(lines)
  print filename,"read end"
  time_begin=time.time()
  for line in lines:
    try:
      line=line.encode('utf-8')
    except:
      continue
    line_items=line.split('\t')
    document = xapian.Document()
    freq_sortable=xapian.sortable_serialise(float(line_items[3]))
    click_sortable=xapian.sortable_serialise(float(line_items[4]))
    document.add_value(FREQ,freq_sortable)
    document.add_value(CLICK,click_sortable)
    document.add_value(DATE,line_items[1])
    document.set_data(line_items[0])
    terms=rex.findall(line_items[0])
    for term in terms:
      if len(term) > MAX_TERM_LENGTH:
        document.add_term(stemmer(term[:MAX_TERM_LENGTH]))
      else:
        document.add_term(stemmer(term))
    database.add_document(document)
    processed+=1
    del line
    del line_items
    del document
    del freq_sortable
    del click_sortable
    del terms

    if processed%100000==0:
      end=time.time()
      speed=100000/float(end-time_begin)
      print "="*40
      print filename
      print "speed:\t",speed
      print "percent:\t%s %%" %(100.0*(processed/float(len_file)))
      print "time remain:\t %s hours" %( (len_file-processed)/(speed*3600))
      time_begin=time.time()
  
  gc.collect()
  os.system("rm -rf %s" % filename)
  print filename,"end"
コード例 #15
0
ファイル: xapian_music.py プロジェクト: albins/music-tools
def make_value(s, term):
    """Parse various string values and return suitable numeric
    representations."""
    if term == 'year':
        # This is in a date string format due to serialization.
        return xapian.sortable_serialise(int(s))
    if term == 'mtime':
        return xapian.sortable_serialise(time.mktime(time.strptime(s)))
    if term == 'rating':
        return xapian.sortable_serialise(float(s))
    else:
        return xapian.sortable_serialise(int(s))
コード例 #16
0
def index(datapath, dbpath):
    # Create or open the database we're going to be writing to.
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)

    # Set up a TermGenerator that we'll use in indexing.
    termgenerator = xapian.TermGenerator()
    termgenerator.set_stemmer(xapian.Stem("en"))

    for fields in parse_csv_file(datapath):
        # 'fields' is a dictionary mapping from field name to value.
        # Pick out the fields we're going to index.
        description = fields.get('DESCRIPTION', u'')
        title = fields.get('TITLE', u'')
        identifier = fields.get('id_NUMBER', u'')

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)

        # Index each field with a suitable prefix.
        termgenerator.index_text(title, 1, 'S')
        termgenerator.index_text(description, 1, 'XD')

        # Index fields without prefixes for general search.
        termgenerator.index_text(title)
        termgenerator.increase_termpos()
        termgenerator.index_text(description)

        # Store all the fields for display purposes.
        doc.set_data(json.dumps(fields, encoding='utf8'))

        ### Start of example code.
        # parse the two values we need
        measurements = fields.get('MEASUREMENTS', u'')
        if measurements != u'':
            numbers = numbers_from_string(measurements)
            if len(numbers) > 0:
                doc.add_value(0, xapian.sortable_serialise(max(numbers)))

        date_made = fields.get('DATE_MADE', u'')
        years = numbers_from_string(date_made)
        if len(years) > 0:
            doc.add_value(1, xapian.sortable_serialise(years[0]))


### End of example code.

# We use the identifier to ensure each object ends up in the
# database only once no matter how many times we run the
# indexer.
        idterm = u"Q" + identifier
        doc.add_boolean_term(idterm)
        db.replace_document(idterm, doc)
コード例 #17
0
def index(datapath, dbpath):
    # Create or open the database we're going to be writing to.
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)

    # Set up a TermGenerator that we'll use in indexing.
    termgenerator = xapian.TermGenerator()
    termgenerator.set_stemmer(xapian.Stem("en"))

    for fields in parse_csv_file(datapath):
        # 'fields' is a dictionary mapping from field name to value.
        # Pick out the fields we're going to index.
        description = fields.get('DESCRIPTION', u'')
        title = fields.get('TITLE', u'')
        identifier = fields.get('id_NUMBER', u'')

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)

        # Index each field with a suitable prefix.
        termgenerator.index_text(title, 1, 'S')
        termgenerator.index_text(description, 1, 'XD')

        # Index fields without prefixes for general search.
        termgenerator.index_text(title)
        termgenerator.increase_termpos()
        termgenerator.index_text(description)

        # Store all the fields for display purposes.
        doc.set_data(json.dumps(fields, encoding='utf8'))

### Start of example code.
        # parse the two values we need
        measurements = fields.get('MEASUREMENTS', u'')
        if measurements != u'':
            numbers = numbers_from_string(measurements)
            if len(numbers) > 0:
                doc.add_value(0, xapian.sortable_serialise(max(numbers)))

        date_made = fields.get('DATE_MADE', u'')
        years = numbers_from_string(date_made)
        if len(years) > 0:
            doc.add_value(1, xapian.sortable_serialise(years[0]))
### End of example code.

        # We use the identifier to ensure each object ends up in the
        # database only once no matter how many times we run the
        # indexer.
        idterm = u"Q" + identifier
        doc.add_boolean_term(idterm)
        db.replace_document(idterm, doc)
コード例 #18
0
ファイル: catalog.py プロジェクト: hforge/itools
def _encode_simple_value(field_cls, value):
    # Integers (FIXME this doesn't work with the big integers)
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)
    elif issubclass(field_cls, Decimal):
        # FIXME: We convert decimal->float so we lost precision
        return sortable_serialise(float(value))

    # Datetimes: normalize to UTC, so searching works
    if type(value) is datetime:
        value = value.astimezone(fixed_offset(0))

    # A common field or a new field
    return field_cls.encode(value)
コード例 #19
0
ファイル: jot.py プロジェクト: ttaylordev/z
def dict2doc(y):
    doc = xapian.Document()
    indexer.set_document(doc)

    url = y['url']
    uid = urlid(url)
    sid = uid[:8]
    doc.add_boolean_term(P['id'] + uid)
    # add the id and short id as unprefixed/stemmed terms to
    # make it easier to select bookmarks from search results
    for idterm in [uid, sid, 'Z' + uid, 'Z' + sid]:
        doc.add_boolean_term(idterm)

    doc.add_value(VALUE_URL, url)

    # add hostname parts as site terms
    hostname = urlparse(url).hostname
    if hostname:
        hs = hostname.split('.')
        for i in xrange(len(hs)):
            doc.add_boolean_term(P['site'] + '.'.join(hs[i:]))

    archive_path = get_archive_path(uid)
    if archive_path:
        y['tags'].append('archived')

    # remove duplicate tags, preserving order
    y['tags'] = list(OrderedDict.fromkeys(y['tags']))
    alltags = u'\x1f'.join(y['tags'])
    doc.add_value(VALUE_TAGS, alltags)
    for tag in y['tags']:
        doc.add_boolean_term(P['tag'] + tag)

    if 'title' in y:
        doc.add_value(VALUE_TITLE, y['title'])
        index_text(y['title'], 'title')

    if 'notes' in y:
        doc.set_data(y['notes'])
        index_text(y['notes'], 'notes')

    created = y.get('created', arrow.utcnow()).timestamp
    doc.add_value(VALUE_CREATED, xapian.sortable_serialise(created))

    if archive_path:
        archived = y.get('archived', arrow.utcnow()).timestamp
        doc.add_value(VALUE_ARCHIVED, xapian.sortable_serialise(archived))
        index_archive(doc, archive_path)

    return doc
コード例 #20
0
ファイル: catalog.py プロジェクト: nkhine/itools
def _encode_simple_value(field_cls, value):
    # Integers (FIXME this doesn't work with the big integers)
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)
    elif issubclass(field_cls, Decimal):
        # FIXME: We convert decimal->float so we lost precision
        return sortable_serialise(float(value))

    # Datetimes: normalize to UTC, so searching works
    if type(value) is datetime:
        value = value.astimezone(fixed_offset(0))

    # A common field or a new field
    return field_cls.encode(value)
コード例 #21
0
ファイル: query.py プロジェクト: everydo/zapian
def normalize_range(begin, end):
    """ 查询时,转换range 参数,主要是把 float/int 转换为 str 格式 """

    if begin is not None:
        if isinstance(begin, float):
            begin = xapian.sortable_serialise(float(begin))
        else:
            begin = str(begin)

    if end is not None:
        if isinstance(end, float):
            end = xapian.sortable_serialise(float(end))
        else:
            end = str(end)
    return begin, end
コード例 #22
0
ファイル: fieldactions.py プロジェクト: PaulRudin/xappy
def _act_weight(fieldname, doc, field, context, type=None):
    """Perform the WEIGHT action.

    """
    value = float(field.value)
    value = xapian.sortable_serialise(value)
    doc.add_value(fieldname, value, 'weight')
コード例 #23
0
    def add_product(self, product, database_path=None):
        """Adds product to repository.
        product - Product to be added to database
        database_path - Path of the database where product is added. Default: None
        When repository has been created with many database paths then database_path must
        be defined."""
        # Set up a TermGenerator that we'll use in indexing.
        if len(self._databases) > 1:
            assert database_path != None, \
                "With many databases you must identify the database where product is added"

        termgenerator = xapian.TermGenerator()
        termgenerator.set_stemmer(self._create_stem())

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)
        termgenerator.index_text(unicode(product.title))
        termgenerator.index_text(unicode(product.description))
        doc.set_data(unicode(json.dumps(product.__dict__)))
        doc.add_value(0, xapian.sortable_serialise(float(product.price)))

        idterm = "Q" + product.url
        doc.add_boolean_term(idterm)

        db = self._db
        if database_path:
            db = self._databases[database_path]

        db.replace_document(idterm, doc)
コード例 #24
0
    def index(self, document, fname, entry):
        # Index a single term "XD", marking that the package contains .desktop
        # files
        document.add_term("XD")

        # Index the name of the .desktop file, with prefix XDF
        document.add_term("XDF" + fname)

        # Index keywords retrieved in this indexer's language
        self.indexer.set_document(document)
        oldlangs = Locale.langs
        try:
            Locale.langs = self.xdglangs
            self.indexer.index_text_without_positions(entry.getName())
            self.indexer.index_text_without_positions(entry.getGenericName())
            self.indexer.index_text_without_positions(entry.getComment())
        finally:
            Locale.langs = oldlangs

        # Index .desktop categories, with prefix XDT
        for cat in entry.getCategories():
            document.add_term("XDT" + cat)

        # Add an "app-popcon" value with popcon rank
        try:
            popcon = int(entry.get("X-AppInstall-Popcon"))
        except ValueError as e:
            if self.progress:
                self.progress.verbose("%s: parsing X-AppInstall-Popcon: %s" %
                                      (fname, str(e)))
            popcon = -1
        if self.val_popcon != -1:
            document.add_value(self.val_popcon,
                               xapian.sortable_serialise(popcon))
コード例 #25
0
ファイル: api.py プロジェクト: everydo/zapian
        def _add_value(doc, slotnum, value):

            if isinstance(value, float):
                value = xapian.sortable_serialise(float(value))
                doc.add_value(int(slotnum), value)
            else:
                doc.add_value(int(slotnum), str(value))
コード例 #26
0
def _marshal_value(value):
    """
    Private utility method that converts Python values to a string for Xapian values.
    """
    if isinstance(value, (int, long)):
        value = xapian.sortable_serialise(value)
    return value
コード例 #27
0
ファイル: indexer.py プロジェクト: pombredanne/mua
    def add(self, msg):
        frombits = msg.from_addr() + ' '
        if msg.from_name():
            frombits += msg.from_name()

        with msg.open() as fp:
            mail = email.message_from_file(fp)
            bodybits = self._get_body(mail)

        if bodybits is None:
            return

        bodybits += u' ' + frombits
        bodybits += u' ' + (msg.subject() or u'')

        doc = xapian.Document()
        self.term_gen.set_document(doc)

        sortable_ts = xapian.sortable_serialise(mua.py.unix_dt(msg.date()))
        doc.add_value(SLOT_DATE, sortable_ts)
        doc.add_value(SLOT_STRONG_ID, mua.py.unb64(msg.strong_id()))

        self.term_gen.index_text(frombits, 1, PREFIX_FROM)
        self.term_gen.index_text(bodybits, 1, PREFIX_BODY)
        self.term_gen.index_text(msg.subject() or u'', 1, PREFIX_SUBJECT)
        return self.db.add_document(doc)
コード例 #28
0
ファイル: indexer.py プロジェクト: mpmendespt/dre
    def convert(self, field_value):
        """
        Generates index values (for sorting) for given field value and its content type
        """
        if field_value is None:
            return None

        content_type = self._get_content_type(field_value)

        value = field_value

        if self._is_float_or_interger(content_type):
            value = xapian.sortable_serialise(field_value)
        elif isinstance(content_type, (models.BooleanField, bool)):
            # Boolean fields are stored as 't' or 'f'
            value = field_value and 't' or 'f'
        elif isinstance(content_type, (models.DateTimeField, datetime.datetime)):
            # DateTime fields are stored as %Y%m%d%H%M%S (better sorting)
            # value = field_value.strftime('%Y%m%d%H%M%S')
            value = '%d%02d%02d%02d%02d%02d' % ( field_value.year,
                                                 field_value.month,
                                                 field_value.day,
                                                 field_value.hour,
                                                 field_value.minute,
                                                 field_value.second )

        return smart_str(value)
コード例 #29
0
ファイル: xapian_backend.py プロジェクト: rob-b/Grab
 def _marshal_value(self, value):
     """
     Private method that converts Python values to a string for Xapian values.
     """
     if isinstance(value, datetime.datetime):
         if value.microsecond:
             value = u'%04d%02d%02d%02d%02d%02d%06d' % (
                 value.year, value.month, value.day, value.hour,
                 value.minute, value.second, value.microsecond
             )
         else:
             value = u'%04d%02d%02d%02d%02d%02d' % (
                 value.year, value.month, value.day, value.hour,
                 value.minute, value.second
             )
     elif isinstance(value, datetime.date):
         value = u'%04d%02d%02d000000' % (value.year, value.month, value.day)
     elif isinstance(value, bool):
         if value:
             value = u't'
         else:
             value = u'f'
     elif isinstance(value, (int, long, float)):
         value = xapian.sortable_serialise(value)
     else:
         value = force_unicode(value)
     return value
コード例 #30
0
ファイル: utils.py プロジェクト: kennym/itools
def _encode_simple_value(field_cls, value):
    # Overload the Integer type
    # XXX warning: this doesn't work with the big integers!
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)
    # A common field or a new field
    return field_cls.encode(value)
コード例 #31
0
ファイル: index.py プロジェクト: tlevine/cbuh
def index(contacts, database, prefixes):
    c = config(contacts)

    db = xapian.WritableDatabase(database, xapian.DB_CREATE_OR_OPEN)

    p = set()
    for person, data in c:
        doc = xapian.Document()
        termgenerator.set_document(doc)

        termgenerator.index_text(person, 1, u'id')
        for prefix, content in data:
            if prefix[0] in digits[:5]:
                doc.add_value(int(prefix[0]), xapian.sortable_serialise(int(content)))
            elif prefix[0] in digits[5:]:
                doc.add_value(int(prefix[0]), content)
            else:
                termgenerator.index_text(content, 1, u'X' + prefix)
                termgenerator.index_text(content)
                termgenerator.increase_termpos()
            p.add(prefix)

        doc.add_boolean_term(u'Q' + person)
        doc.set_data(person)
        db.replace_document(u'Q' + person, doc)

    with open(prefixes, 'wb') as fp:
        json.dump(list(p), fp)
コード例 #32
0
def _marshal_value(value):
    """
    Private utility method that converts Python values to a string for Xapian values.
    """
    if isinstance(value, (int, long)):
        value = xapian.sortable_serialise(value)
    return value
コード例 #33
0
 def __call__(self, doc):
     # we want to return a sortable string which represents
     # the distance from Washington, DC to the middle of this
     # state.
     coords = map(float, doc.get_value(4).split(","))
     washington = (38.012, -77.037)
     return xapian.sortable_serialise(support.distance_between_coords(coords, washington))
コード例 #34
0
def _encode_simple_value(field_cls, value):
    # Overload the Integer type
    # XXX warning: this doesn't work with the big integers!
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)
    # A common field or a new field
    return field_cls.encode(value)
コード例 #35
0
ファイル: search_sorting3.py プロジェクト: EQ94/XapianInJD
 def __call__(self, doc):
     # we want to return a sortable string which represents
     # the distance from Washington, DC to the middle of this
     # state.
     coords = map(float, doc.get_value(4).split(','))
     washington = (38.012, -77.037)
     return xapian.sortable_serialise(
         support.distance_between_coords(coords, washington))
コード例 #36
0
ファイル: sizes.py プロジェクト: haniokasai/netwalker-rootfs
    def index(self, document, pkg):
        """
        Update the document with the information from this data source.

        document  is the document to update
        pkg       is the python-apt Package object for this package
        """
        try:
            instSize = pkg.installedSize
            pkgSize = pkg.packageSize
        except:
            return

        if self.val_inst_size != -1:
            document.add_value(self.val_inst_size, xapian.sortable_serialise(instSize));
        if self.val_pkg_size != -1:
            document.add_value(self.val_pkg_size, xapian.sortable_serialise(pkgSize));
コード例 #37
0
ファイル: update.py プロジェクト: cs2c/AppStream
def update(db, cache, datadir=None):
    if not datadir:
        datadir = softwarecenter.paths.APP_INSTALL_DESKTOP_PATH
    update_from_app_install_data(db, cache, datadir)
    update_from_var_lib_apt_lists(db, cache)
    # add db global meta-data
    LOG.debug("adding popcon_max_desktop '%s'" % popcon_max)
    db.set_metadata("popcon_max_desktop", xapian.sortable_serialise(float(popcon_max)))
コード例 #38
0
def encode_sortable_date(d):
    try:
        t = time.strptime(d, ISO_8601)
        n = -int(time.mktime(t))
    except:
        n = 0

    return xapian.sortable_serialise(n)
コード例 #39
0
def _marshal_value(value, prefunc=None):
    """
    Private utility method that converts Python values to a string for Xapian values.
    prefunc 对值做预处理
    """
    if value is None:
        return 0

    if prefunc:
        value = prefunc(value)
    if isinstance(value, (int, long, float)):
        value = xapian.sortable_serialise(value)
    elif isinstance(value, bool):
        value = 1 if value else 0
        value = xapian.sortable_serialise(value)
    value = str(value).lower()
    return value
コード例 #40
0
def _marshal_value(value, pre_func=None):
    """
    Private utility method that converts Python values to a string for Xapian values.
    """
    if pre_func:
        value = pre_func(value)
    # value 默认为int, long, float
    value = xapian.sortable_serialise(value)
    return value
コード例 #41
0
def _marshal_value(value, pre_func=None):
    """
    Private utility method that converts Python values to a string for Xapian values.
    """
    if pre_func:
        value = pre_func(value)
    # value 默认为int, long, float
    value = xapian.sortable_serialise(value)
    return value
コード例 #42
0
def update(db, cache, datadir=None):
    if not datadir:
        datadir = softwarecenter.paths.APP_INSTALL_DESKTOP_PATH
    update_from_app_install_data(db, cache, datadir)
    update_from_var_lib_apt_lists(db, cache)
    # add db global meta-data
    LOG.debug("adding popcon_max_desktop %r", popcon_max)
    db.set_metadata("popcon_max_desktop",
                    xapian.sortable_serialise(float(popcon_max)))
コード例 #43
0
    def index(self, document, pkg):
        """
        Update the document with the information from this data source.

        document  is the document to update
        pkg       is the python-apt Package object for this package
        """
        time = self._package_cataloged_time.get(pkg.name, self.now)
        self._package_cataloged_time[pkg.name] = time
        document.add_value(self.value, xapian.sortable_serialise(time))
コード例 #44
0
    def index(self, document, pkg):
        """
        Update the document with the information from this data source.

        document  is the document to update
        pkg       is the python-apt Package object for this package
        """
        time = self._package_cataloged_time.get(pkg.name, self.now)
        self._package_cataloged_time[pkg.name] = time
        document.add_value(self.value, xapian.sortable_serialise(time))
コード例 #45
0
 def __call__(self, doc):
     # we want to return a sortable string which represents
     # the distance from Washington, DC to the middle of this
     # state.
     value = doc.get_value(4).decode('utf8')
     x, y = map(float, value.split(','))
     washington = (38.012, -77.037)
     return xapian.sortable_serialise(
         support.distance_between_coords((x, y), washington)
         )
コード例 #46
0
def _encode_simple_value(field_cls, value):
    # Integers (FIXME this doesn't work with the big integers)
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)

    # Datetimes: normalize to UTC, so searching works
    if type(value) is datetime:
        value = value.astimezone(fixed_offset(0))

    # A common field or a new field
    return field_cls.encode(value)
コード例 #47
0
ファイル: serialise.py プロジェクト: Kronuz/pyXapiand
def serialise_value(value):
    """
    Utility method that converts Python values to a string for Xapian values.

    """
    values = []
    if isinstance(value, datetime.datetime):
        if value.microsecond:
            value = "%04d%02d%02d%02d%02d%02d%06d" % (
                value.year,
                value.month,
                value.day,
                value.hour,
                value.minute,
                value.second,
                value.microsecond,
            )
        else:
            value = "%04d%02d%02d%02d%02d%02d" % (
                value.year,
                value.month,
                value.day,
                value.hour,
                value.minute,
                value.second,
            )
        values.append(value)
    elif isinstance(value, datetime.date):
        value = "%04d%02d%02d000000" % (value.year, value.month, value.day)
        values.append(value)
    elif isinstance(value, datetime.time):
        if value.microsecond:
            value = "%02d%02d%02d%06d" % (value.hour, value.minute, value.second, value.microsecond)
        else:
            value = "%02d%02d%02d" % (value.hour, value.minute, value.second)
        values.append(value)
    elif isinstance(value, bool):
        values.append("t" if value else "f")
    elif isinstance(value, float):
        values.append(sortable_serialise(value))
    elif isinstance(value, (int, long)):
        values.append("%012d" % value)
    elif isinstance(value, LatLongCoord):
        value = value.serialise()
        values.append(value)
        for term, value in [(value[:-i], 5 - i) if i else (value, 5) for i in range(5)]:
            values.append(value)
    elif hasattr(value, "serialise"):
        values.append(value.serialise())
    elif value:
        values.append(normalize("%s" % value))
    else:
        values.append("")
    return values
コード例 #48
0
ファイル: documents.py プロジェクト: oschwand/xapers
 def _set_year(self, year):
     # FIXME: what to do if year is not an int?
     try:
         year = int(year)
     except ValueError:
         pass
     prefix = self.db._find_prefix('year')
     for term in self._term_iter(prefix):
         self._remove_term(prefix, year)
     self._add_term(prefix, year)
     facet = self.db._find_facet('year')
     self.xapian_doc.add_value(facet, xapian.sortable_serialise(year))
コード例 #49
0
def test_matchspy():
    """Test use of matchspies.

    """
    db = setup_database()
    query = xapian.Query(xapian.Query.OP_OR, "was", "it")
    enq = xapian.Enquire(db)
    enq.set_query(query)

    def set_matchspy_deref(enq):
        """Set a matchspy, and then drop the reference, to check that it
        doesn't get deleted too soon.
        """
        spy = xapian.ValueCountMatchSpy(0)
        enq.add_matchspy(spy)
        del spy

    set_matchspy_deref(enq)
    mset = enq.get_mset(0, 10)
    expect(len(mset), 5)

    spy = xapian.ValueCountMatchSpy(0)
    enq.add_matchspy(spy)
    # Regression test for clear_matchspies() - used to always raise an
    # exception due to a copy and paste error in its definition.
    enq.clear_matchspies()
    mset = enq.get_mset(0, 10)
    expect([item for item in list(spy.values())], [])

    enq.add_matchspy(spy)
    mset = enq.get_mset(0, 10)
    expect(spy.get_total(), 5)
    expect([(item.term, item.termfreq) for item in list(spy.values())], [
        (xapian.sortable_serialise(1.5), 1),
        (xapian.sortable_serialise(2), 2),
    ])
    expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
        (xapian.sortable_serialise(2), 2),
        (xapian.sortable_serialise(1.5), 1),
    ])
コード例 #50
0
def setup_database():
    """Set up and return an inmemory database with 5 documents.

    """
    db = xapian.inmemory_open()

    doc = xapian.Document()
    doc.set_data("is it cold?")
    doc.add_term("is")
    doc.add_posting("it", 1)
    doc.add_posting("cold", 2)
    db.add_document(doc)

    doc = xapian.Document()
    doc.set_data("was it warm?")
    doc.add_posting("was", 1)
    doc.add_posting("it", 2)
    doc.add_posting("warm", 3)
    db.add_document(doc)
    doc.set_data("was it warm? two")
    doc.add_term("two", 2)
    doc.add_value(0, xapian.sortable_serialise(2))
    db.add_document(doc)
    doc.set_data("was it warm? three")
    doc.add_term("three", 3)
    doc.add_value(0, xapian.sortable_serialise(1.5))
    db.add_document(doc)
    doc.set_data("was it warm? four it")
    doc.add_term("four", 4)
    doc.add_term("it", 6)
    doc.add_posting("it", 7)
    doc.add_value(5, 'five')
    doc.add_value(9, 'nine')
    doc.add_value(0, xapian.sortable_serialise(2))
    db.add_document(doc)

    expect(db.get_doccount(), 5)
    return db
コード例 #51
0
ファイル: fieldmap.py プロジェクト: anhnguyendepocen/flaxcode
        def mq(v):
            if isinstance(v, unicode):
                v = v.encode('utf-8', 'ignore')

            if isinstance(v, str):
                return xapian.Query('%s%s%s' %
                                    (prefix, ':' if v[0].isupper() else '', v))
            elif isinstance(v, int) or isinstance(v, float):
                strv = xapian.sortable_serialise(v)
                return xapian.Query(xapian.Query.OP_VALUE_RANGE, valnum, strv,
                                    strv)
            elif isinstance(v, datetime):
                term = '%s%04d%02d%02d' % (prefix, v.year, v.month, v.day)
                #                strv = '%04d%02d%02d%02d%02d%02d' % (
                #                    v.year, v.month, v.day, v.hour, v.minute, v.second)
                strv = xapian.sortable_serialise(time.mktime(v.timetuple()))
                return xapian.Query(
                    xapian.Query.OP_AND, xapian.Query(term),
                    xapian.Query(xapian.Query.OP_VALUE_RANGE, valnum, strv,
                                 strv))
            else:
                raise SearchError, 'unexpected type (%s) for value %s' % (
                    type(v), v)
コード例 #52
0
class Indexer:
    def __init__(self, lang, val_popcon, progress=None):
        self.val_popcon = val_popcon
        self.progress = progress
        if lang is None:
            lang = "en"
        self.lang = lang
        self.xlang = lang.split("_")[0]
        self.xdglangs = Locale.expand_languages(lang)
        self.indexer = xapian.TermGenerator()
        # Get a stemmer for this language, if available
        try:
            self.stemmer = xapian.Stem(self.xlang)
            self.indexer.set_stemmer(self.stemmer)
        except xapian.InvalidArgumentError:
            pass

    def index(self, document, fname, entry):
        # Index a single term "XD", marking that the package contains .desktop
        # files
        document.add_term("XD")

        # Index the name of the .desktop file, with prefix XDF
        document.add_term("XDF" + fname)

        # Index keywords retrieved in this indexer's language
        self.indexer.set_document(document)
        oldlangs = Locale.langs
        try:
            Locale.langs = self.xdglangs
            self.indexer.index_text_without_positions(entry.getName())
            self.indexer.index_text_without_positions(entry.getGenericName())
            self.indexer.index_text_without_positions(entry.getComment())
        finally:
            Locale.langs = oldlangs

        # Index .desktop categories, with prefix XDT
        for cat in entry.getCategories():
            document.add_term("XDT"+cat)

        # Add an "app-popcon" value with popcon rank
        try:
            popcon = int(entry.get("X-AppInstall-Popcon"))
        except ValueError, e:
            if self.progress:
                self.progress.verbose("%s: parsing X-AppInstall-Popcon: %s" % (fname, str(e)))
            popcon = -1
        if self.val_popcon != -1:
            document.add_value(self.val_popcon, xapian.sortable_serialise(popcon));
コード例 #53
0
def test_value_iter():
    """Test iterators over list of values in a document.

    """
    db = setup_database()
    doc = db.get_document(5)

    items = list(doc.values())
    expect(len(items), 3)
    expect(items[0].num, 0)
    expect(items[0].value, xapian.sortable_serialise(2))
    expect(items[1].num, 5)
    expect(items[1].value, 'five')
    expect(items[2].num, 9)
    expect(items[2].value, 'nine')
コード例 #54
0
ファイル: serialise.py プロジェクト: ra2003/pyXapiand
def serialise_value(value):
    """
    Utility method that converts Python values to a string for Xapian values.

    """
    values = []
    if isinstance(value, datetime.datetime):
        if value.microsecond:
            value = '%04d%02d%02d%02d%02d%02d%06d' % (
                value.year, value.month, value.day, value.hour, value.minute,
                value.second, value.microsecond)
        else:
            value = '%04d%02d%02d%02d%02d%02d' % (value.year, value.month,
                                                  value.day, value.hour,
                                                  value.minute, value.second)
        values.append(value)
    elif isinstance(value, datetime.date):
        value = '%04d%02d%02d000000' % (value.year, value.month, value.day)
        values.append(value)
    elif isinstance(value, datetime.time):
        if value.microsecond:
            value = '%02d%02d%02d%06d' % (value.hour, value.minute,
                                          value.second, value.microsecond)
        else:
            value = '%02d%02d%02d' % (value.hour, value.minute, value.second)
        values.append(value)
    elif isinstance(value, bool):
        values.append('t' if value else 'f')
    elif isinstance(value, float):
        values.append(sortable_serialise(value))
    elif isinstance(value, (int, long)):
        values.append('%012d' % value)
    elif isinstance(value, LatLongCoord):
        value = value.serialise()
        values.append(value)
        for term, value in [(value[:-i], 5 - i) if i else (value, 5)
                            for i in range(5)]:
            values.append(value)
    elif hasattr(value, 'serialise'):
        values.append(value.serialise())
    elif value:
        values.append(normalize("%s" % value))
    else:
        values.append('')
    return values
コード例 #55
0
def index(keyword_iter):
    for id, cid, rank, kw in keyword_iter():
        doc = xapian.Document()
        doc.add_value(0, id)
        doc.add_value(1, xapian.sortable_serialise(rank))
        doc.add_value(2, cid)

        for word, value in kw:
            if word:
                if not word.startswith('>'):
                    if len(word) < 254:
                        doc.add_term(word, value)

        key = '>%s' % id
        doc.add_term(key)
        SEARCH_DB.replace_document(key, doc)

    flush_db()
コード例 #56
0
ファイル: models.py プロジェクト: lamby/nm2
 def index(self, entries):
     count = 0
     for tag, date, changedby, changelog in entries:
         count += 1
         #if count % 1000 == 0:
         #    print date
         xid = "XP" + tag
         document = xapian.Document()
         document.set_data(changelog + "\n" + " -- " + changedby + "  " +
                           date)
         #print date
         # Ignore timezones for our purposes: dealing with timezones in
         # python means dealing with one of the most demented pieces of code
         # people have ever conceived, or otherwise it means introducing
         # piles of external dependencies that maybe do the job. We can get
         # away without timezones, it is a lucky thing and we take advantage
         # of such strokes of luck.
         ts = 0
         mo = self.re_ts.match(date)
         if mo:
             #ts = time.mktime(time.strptime(mo.group(1), "%a, %d %b %Y %H:%M:%S"))
             parsed = email.utils.parsedate_tz(mo.group(1))
             if parsed is not None:
                 ts = time.mktime(parsed[:9])
         #parsed = dateutil.parser.parse(date)
         #parsed = email.utils.parsedate_tz(date)
         #ts = time.mktime(parsed[:9]) - parsed[9]
         document.add_value(0, xapian.sortable_serialise(ts))
         document.add_term(xid)
         pos = 0
         lines = changelog.split("\n")[1:]
         lines.append(changedby)
         for l in lines:
             for tok in self.tokenise(l):
                 tok = tok.strip(".-")
                 if not tok: continue
                 # see ircd (2.10.04+-1)
                 if len(tok) > 100: continue
                 if tok.isdigit(): continue
                 document.add_posting(tok, pos)
                 pos += 1
         self.xdb.replace_document(xid, document)
         if self.max_ts is None or ts > self.max_ts:
             self.max_ts = ts
コード例 #57
0
    def store(self, guid, properties, new, pre_cb=None, post_cb=None, *args):
        if self._db is None:
            self._do_open()

        if pre_cb is not None:
            pre_cb(guid, properties, *args)

        _logger.debug('Index %r object: %r', self.metadata.name, properties)

        document = xapian.Document()
        term_generator = xapian.TermGenerator()
        term_generator.set_document(document)

        for name, prop in self._props.items():
            value = guid if prop.slot == 0 else properties[name]

            if prop.slot is not None:
                if prop.typecast in [int, float, bool]:
                    add_value = xapian.sortable_serialise(value)
                else:
                    if prop.localized:
                        value = env.gettext(value, self._lang) or ''
                    add_value = prop.to_string(value)[0]
                document.add_value(prop.slot, add_value)

            if prop.prefix or prop.full_text:
                for value in prop.to_string(value):
                    if prop.prefix:
                        if prop.boolean:
                            document.add_boolean_term(_term(
                                prop.prefix, value))
                        else:
                            document.add_term(_term(prop.prefix, value))
                    if prop.full_text:
                        term_generator.index_text(value, 1, prop.prefix or '')
                    term_generator.increase_termpos()

        self._db.replace_document(_term(env.GUID_PREFIX, guid), document)
        self._pending_updates += 1

        if post_cb is not None:
            post_cb(guid, properties, *args)

        self._check_for_commit()
コード例 #58
0
def _marshal_value(value):
    """
    Private utility method that converts Python values to a string for Xapian values.
    """
    if isinstance(value, datetime.datetime):
        value = _marshal_datetime(value)
    elif isinstance(value, datetime.date):
        value = _marshal_date(value)
    elif isinstance(value, bool):
        if value:
            value = u't'
        else:
            value = u'f'
    elif isinstance(value, float):
        value = xapian.sortable_serialise(value)
    elif isinstance(value, (int, long)):
        value = u'%012d' % value
    else:
        value = force_unicode(value).lower()
    return value