class SampleMaker(object): def __init__(self, name): log = open(name, "wb") self.log = log self.conn = ES(("http", "127.0.0.1", 9200), timeout=300.0, log_curl=True, dump_curl=log) self.index_name = "test-index" self.document_type = "test-type" self.conn.delete_index_if_exists(self.index_name) self.init_default_index() def init_default_index(self): from pyes.helpers import SettingsBuilder settings = SettingsBuilder() from pyes.mappings import DocumentObjectField from pyes.mappings import IntegerField from pyes.mappings import NestedObject from pyes.mappings import StringField, DateField, BooleanField, GeoPointField, FloatField docmapping = DocumentObjectField(name=self.document_type) docmapping.add_property( StringField(name="description", store=True, term_vector="with_positions_offsets", index="analyzed")) docmapping.add_property( StringField(name="name", store=True, term_vector="with_positions_offsets", index="analyzed")) docmapping.add_property(StringField(name="tag", store=True, index="not_analyzed")) docmapping.add_property(IntegerField(name="age", store=True)) docmapping.add_property(FloatField(name="price")) docmapping.add_property(DateField(name="date", store=True)) docmapping.add_property(BooleanField(name="in_stock", store=True, index="not_analyzed")) docmapping.add_property(GeoPointField(name="position")) nested_object = NestedObject(name="metadata") nested_object.add_property(StringField(name="name", store=True)) nested_object.add_property(StringField(name="value", store=True)) nested_object.add_property(IntegerField(name="num", store=True)) docmapping.add_property(nested_object) settings.add_mapping(docmapping) self.conn.ensure_index(self.index_name, settings) def generate_datafile(self, number_items=1000): """ Generate a dataset with number_items elements. """ names = get_names() totalnames = len(names) #init random seeder random.seed() #calculate items # names = random.sample(names, number_items) for i in xrange(number_items): data = {"name": names[random.randint(0, totalnames - 1)], "age": random.randint(1, 100), "price": random.random()*100.0, "tag":[words(1, False) for r in xrange(random.randint(1, 5))], "in_stock": random.choice([True, False]), "date": datetime.now()+timedelta(days=random.choice([1, -1])*random.randint(0,1000)), "position": { "lat" : random.choice([1, -1])* random.random()*90.0, "lon" : random.choice([1, -1])* random.random()*180.0 }, "description": words(random.randint(1, 100), False), "metadata":[{"name":names[random.randint(0, totalnames - 1)], "value":str(random.randint(1, 5)), "num":random.randint(1, 50) } for r in xrange(random.randint(1, 5))] } self.conn.index(data, self.index_name, self.document_type, id=str(i+1)) def close(self): self.conn.flush(self.index_name) self.log.close()
class SampleMaker(object): def __init__(self, name): log = open(name, "wb") self.log = log self.conn = ES(("http", "127.0.0.1", 9200), timeout=300.0, log_curl=True, dump_curl=log) self.index_name = "test-index" self.document_type = "test-type" self.conn.delete_index_if_exists(self.index_name) self.init_default_index() def init_default_index(self): from pyes.helpers import SettingsBuilder settings = SettingsBuilder() from pyes.mappings import DocumentObjectField from pyes.mappings import IntegerField from pyes.mappings import NestedObject from pyes.mappings import StringField, DateField, BooleanField, GeoPointField, FloatField docmapping = DocumentObjectField(name=self.document_type) docmapping.add_property( StringField(name="description", store=True, term_vector="with_positions_offsets", index="analyzed")) docmapping.add_property( StringField(name="name", store=True, term_vector="with_positions_offsets", index="analyzed")) docmapping.add_property( StringField(name="tag", store=True, index="not_analyzed")) docmapping.add_property(IntegerField(name="age", store=True)) docmapping.add_property(FloatField(name="price")) docmapping.add_property(DateField(name="date", store=True)) docmapping.add_property( BooleanField(name="in_stock", store=True, index="not_analyzed")) docmapping.add_property(GeoPointField(name="position")) nested_object = NestedObject(name="metadata") nested_object.add_property(StringField(name="name", store=True)) nested_object.add_property(StringField(name="value", store=True)) nested_object.add_property(IntegerField(name="num", store=True)) docmapping.add_property(nested_object) settings.add_mapping(docmapping) self.conn.ensure_index(self.index_name, settings) def generate_datafile(self, number_items=1000): """ Generate a dataset with number_items elements. """ names = get_names() totalnames = len(names) #init random seeder random.seed() #calculate items # names = random.sample(names, number_items) for i in xrange(number_items): data = { "name": names[random.randint(0, totalnames - 1)], "age": random.randint(1, 100), "price": random.random() * 100.0, "tag": [words(1, False) for r in xrange(random.randint(1, 5))], "in_stock": random.choice([True, False]), "date": datetime.now() + timedelta(days=random.choice([1, -1]) * random.randint(0, 1000)), "position": { "lat": random.choice([1, -1]) * random.random() * 90.0, "lon": random.choice([1, -1]) * random.random() * 180.0 }, "description": words(random.randint(1, 100), False), "metadata": [{ "name": names[random.randint(0, totalnames - 1)], "value": str(random.randint(1, 5)), "num": random.randint(1, 50) } for r in xrange(random.randint(1, 5))] } self.conn.index(data, self.index_name, self.document_type, id=str(i + 1)) def close(self): self.conn.flush(self.index_name) self.log.close()