def update_document(url, doc): es_server = 'http://localhost:9200/' if os.environ.get('ELASTICSEARCH_SERVER'): es_server = os.environ['ELASTICSEARCH_SERVER'] es = ElasticSearch(es_server) try: es.update(index='memex', doc_type='page', id=url, script=doc) except: print "Unexpected error:", sys.exc_info()[0] pass
def update_document(url,doc): es_server = 'http://localhost:9200/' if os.environ.get('ELASTICSEARCH_SERVER'): es_server = os.environ['ELASTICSEARCH_SERVER'] es = ElasticSearch(es_server) try: es.update(index='memex', doc_type='page', id=url, script=doc) except: print "Unexpected error:", sys.exc_info()[0] pass
class Elastic(DataLayer): """ElasticSearch data layer.""" serializers = { 'integer': int, 'datetime': parse_date } def init_app(self, app): app.config.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200/') app.config.setdefault('ELASTICSEARCH_INDEX', 'eve') self.es = ElasticSearch(app.config['ELASTICSEARCH_URL']) self.index = app.config['ELASTICSEARCH_INDEX'] def _get_field_mapping(self, schema): """Get mapping for given field schema.""" if schema['type'] == 'datetime': return {'type': 'date'} elif schema['type'] == 'string' and schema.get('unique'): return {'type': 'string', 'index': 'not_analyzed'} elif schema['type'] == 'string': return {'type': 'string'} def put_mapping(self, app): """Put mapping for elasticsearch for current schema. It's not called automatically now, but rather left for user to call it whenever it makes sense. """ for resource, resource_config in app.config['DOMAIN'].items(): properties = {} properties[config.DATE_CREATED] = self._get_field_mapping({'type': 'datetime'}) properties[config.LAST_UPDATED] = self._get_field_mapping({'type': 'datetime'}) for field, schema in resource_config['schema'].items(): field_mapping = self._get_field_mapping(schema) if field_mapping: properties[field] = field_mapping datasource = (resource, ) # TODO: config.SOURCES not available yet (self._datasource_ex(resource)) mapping = {} mapping[datasource[0]] = {'properties': properties} self.es.put_mapping(self.index, datasource[0], mapping) def find(self, resource, req, sub_resource_lookup): """ TODO: implement sub_resource_lookup """ query = { 'query': { 'query_string': { 'query': request.args.get('q', '*'), 'default_field': request.args.get('df', '_all'), 'default_operator': 'AND' } } } if not req.sort and self._default_sort(resource): req.sort = self._default_sort(resource) # skip sorting when there is a query to use score if req.sort and 'q' not in request.args: query['sort'] = [] sort = ast.literal_eval(req.sort) for (key, sortdir) in sort: sort_dict = dict([(key, 'asc' if sortdir > 0 else 'desc')]) query['sort'].append(sort_dict) if req.where: where = json.loads(req.where) if where: query['filter'] = { 'term': where } if req.max_results: query['size'] = req.max_results if req.page > 1: query['from'] = (req.page - 1) * req.max_results source_config = config.SOURCES[resource] if 'facets' in source_config: query['facets'] = source_config['facets'] try: args = self._es_args(resource) args['es_fiels'] = self._fields(resource) return self._parse_hits(self.es.search(query, **args), resource) except es_exceptions.ElasticHttpError: return ElasticCursor() def find_one(self, resource, **lookup): args = self._es_args(resource) args['es_fields'] = self._fields(resource) if config.ID_FIELD in lookup: try: hit = self.es.get(id=lookup[config.ID_FIELD], **args) except es_exceptions.ElasticHttpNotFoundError: return if not hit['exists']: return doc = hit.get('fields', hit.get('_source', {})) doc['_id'] = hit.get('_id') convert_dates(doc, self._dates(resource)) return doc else: query = { 'query': { 'constant_score': { 'filter': { 'term': lookup } } } } try: args['size'] = 1 docs = self._parse_hits(self.es.search(query, **args), resource) return docs.first() except es_exceptions.ElasticHttpNotFoundError: return None def find_list_of_ids(self, resource, ids, client_projection=None): args = self._es_args(resource) args['es_fields'] = self._fields(resource) return self._parse_hits(self.es.multi_get(ids, **args), resource) def insert(self, resource, doc_or_docs, **kwargs): ids = [] kwargs.update(self._es_args(resource)) for doc in doc_or_docs: doc.update(self.es.index(doc=doc, id=doc.get('_id'), **kwargs)) ids.append(doc['_id']) self.es.refresh(self.index) return ids def update(self, resource, id_, updates): args = self._es_args(resource, refresh=True) return self.es.update(id=id_, doc=updates, **args) def replace(self, resource, id_, document): args = self._es_args(resource, refresh=True) args['overwrite_existing'] = True return self.es.index(document=document, id=id_, **args) def remove(self, resource, id_=None): args = self._es_args(resource, refresh=True) if id_: return self.es.delete(id=id_, **args) else: try: return self.es.delete_all(**args) except es_exceptions.ElasticHttpNotFoundError: return def _parse_hits(self, hits, resource): """Parse hits response into documents.""" return ElasticCursor(hits, self._dates(resource)) def _es_args(self, resource, refresh=None): """Get index and doctype args.""" datasource = self._datasource(resource) args = { 'index': self.index, 'doc_type': datasource[0], } if refresh: args['refresh'] = refresh return args def _fields(self, resource): """Get projection fields for given resource.""" datasource = self._datasource(resource) keys = datasource[2].keys() return ','.join(keys) def _default_sort(self, resource): datasource = self._datasource(resource) return datasource[3] def _dates(self, resource): dates = [config.LAST_UPDATED, config.DATE_CREATED] datasource = self._datasource(resource) schema = config.DOMAIN[datasource[0]]['schema'] for field, field_schema in schema.items(): if field_schema['type'] == 'datetime': dates.append(field) return dates
def save(self, force_insert=False, force_update=False, **kwargs): es = ElasticSearch(ELASTIC_SEARCH_URL) if self.id: location = self.get_location() location_es = "{0},{1}".format(location.y, location.x) es.update('glamazer', 'modelresult', 'listings.listing.{0}'.format(self.id), script="ctx._source.listing_id = listing;" + "ctx._source.artist_id = artist;" + "ctx._source.artist_avatar = artist_avatar;" + "ctx._source.title = title;" + "ctx._source.location = location;" + "ctx._source.description = description;" + "ctx._source.get_picture = get_picture;" + "ctx._source.metadata = metadata;" + "ctx._source.price = price;" + "ctx._source.likes = likes;" + "ctx._source.comments = comments;" + "ctx._source.tags = tags;" + "ctx._source.status = status;" + "ctx._source.style = style;" + "ctx._source.rating = rating", params={ 'listing':self.id, 'artist':self.get_artist_id(), 'artist_avatar':self.get_artist_avatar(), 'title':self.title, 'location':location_es, 'description':self.description, 'get_picture':self.get_picture(), 'metadata':self.metadata, 'price':self.price, 'likes':self.likes, 'comments':self.comments, 'tags':self.get_tags(), 'status':self.status, 'style':self.get_style(), 'rating':self.get_rating() }) super(Listing, self).save(force_insert, force_update) else: super(Listing, self).save(force_insert, force_update) artist_user = self.artist.user artist_name = artist_user.first_name followers = Followers.objects.select_related().filter(artist=self.artist) for follower in followers: Notification.objects.create( sender = artist_user, receiver = follower.user, time = current_time(), short_text = NOTIFICATIONS_SHORT[10].format(artist=artist_name), long_text = NOTIFICATIONS_LONG[10].format(artist=artist_name, listing=self.title, user_id=self.artist_id, metadata=self.id), ) location = self.get_location() location_es = "{0},{1}".format(location.y, location.x) es.index('glamazer', 'modelresult', { 'listing_id': self.id, 'artist_id': self.artist_id, 'artist_avatar':self.get_artist_avatar(), 'title': self.title, 'location': location_es, 'description': self.description, 'get_picture': self.get_picture(), 'metadata': self.metadata, 'price': self.price, 'likes': self.likes, 'comments':self.comments, 'tags': self.get_tags(), 'status':self.status, 'style':self.get_style(), 'rating':self.get_rating() }, id='listings.listing.{0}'.format(self.id)) es.refresh('glamazer')
except ValueError: jq.pop("Runtime", None) # remove comma from imbdbVotes and make into an int if "imdbVotes" in jq: votes = re.sub(",", "", jq["imdbVotes"]) try: jq["imdbVotes"] = int(votes) except ValueError: jq.pop("imdbVotes", None) # parse Metascore to float if "Metascore" in jq: try: jq["Metascore"] = float(jq["Metascore"]) except ValueError: jq.pop("Metascore", None) # parse imdbRating to float if "imdbRating" in jq: try: jq["imdbRating"] = float(jq["imdbRating"]) except ValueError: jq.pop("imdbRating", None) metadata = { "metadata" : "true" } for key, value in jq.iteritems(): #print "key=%s value=%s" % (key, value) metadata[key] = jq[key] # update #print "update=%s" % metadata print es.update("prime", "video", mid, doc=metadata)
def save(self, force_insert=False, force_update=False, **kwargs): es = ElasticSearch(ELASTIC_SEARCH_URL) if self.id: location = self.get_location() location_es = "{0},{1}".format(location.y, location.x) es.update( 'glamazer', 'modelresult', 'listings.listing.{0}'.format(self.id), script="ctx._source.listing_id = listing;" + "ctx._source.artist_id = artist;" + "ctx._source.artist_avatar = artist_avatar;" + "ctx._source.artist_name = artist_name;" + "ctx._source.salon_id = salon;" + "ctx._source.salon_avatar = salon_avatar;" + "ctx._source.salon_name = salon_name;" + "ctx._source.title = title;" + "ctx._source.location = location;" + "ctx._source.description = description;" + "ctx._source.get_picture = get_picture;" + "ctx._source.metadata = metadata;" + "ctx._source.gender = gender;" + "ctx._source.price = price;" + "ctx._source.currency = currency;" + "ctx._source.likes = likes;" + "ctx._source.comments = comments;" + "ctx._source.tags = tags;" + "ctx._source.status = status;" + "ctx._source.style = style;" + "ctx._source.rating = rating", params={ 'listing': self.id, 'artist': self.get_artist_id(), 'artist_avatar': self.get_artist_avatar(), 'artist_name': self.get_artist_name(), 'salon': self.get_salon_id(), 'salon_avatar': self.get_salon_avatar(), 'salon_name': self.get_salon_name(), 'title': self.title, 'location': location_es, 'description': self.description, 'get_picture': self.get_picture(), 'metadata': self.metadata, 'gender': self.gender, 'price': self.price, 'currency': self.currency, 'likes': self.likes, 'comments': self.comments, 'tags': self.get_tags(), 'status': self.status, 'style': self.get_style(), 'rating': self.get_rating() }) super(Listing, self).save(force_insert, force_update) else: super(Listing, self).save(force_insert, force_update) location = self.get_location() location_es = "{0},{1}".format(location.y, location.x) es.index('glamazer', 'modelresult', { 'listing_id': self.id, 'artist_id': self.artist_id, 'artist_avatar': self.get_artist_avatar(), 'artist_name': self.get_artist_name(), 'salon_id': self.get_salon_id(), 'salon_avatar': self.get_salon_avatar(), 'salon_name': self.get_salon_name(), 'title': self.title, 'location': location_es, 'description': self.description, 'get_picture': self.get_picture(), 'metadata': self.metadata, 'gender': self.gender, 'price': self.price, 'currency': self.currency, 'likes': self.likes, 'comments': self.comments, 'tags': self.get_tags(), 'status': self.status, 'style': self.get_style(), 'rating': self.get_rating() }, id='listings.listing.{0}'.format(self.id)) es.refresh('glamazer')
} } } #print query result = es.search(query,size=10000,index=index) for r in result['hits']['hits']: #print r['_source']['pub_content'] query2 = {'query':{ "bool":{ "must":[ {"match_phrase" : {"art_content":r['_source']['art_content']}} ], "must_not":{ "match" : {"_id":r['_id']} } } } } partialResult = es.search(query2,size=paginationSize,index=index) print partialResult['hits']['total'] if partialResult['hits']['total'] > 0: doc = {} doc['art_repeated'] = 1 es.update(index,index,r['_id'],doc=doc) else: doc = {} doc['art_repeated'] = 0 es.update(index,index,r['_id'],doc=doc) #for pr in partialResult['hits']['hits']: #print pr['_source']['art_content']
def save(self, force_insert=False, force_update=False, **kwargs): es = ElasticSearch(ELASTIC_SEARCH_URL) if self.id: location = self.get_location() location_es = "{0},{1}".format(location.y, location.x) es.update('glamazer', 'modelresult', 'listings.listing.{0}'.format(self.id), script="ctx._source.listing_id = listing;" + "ctx._source.artist_id = artist;" + "ctx._source.artist_avatar = artist_avatar;" + "ctx._source.artist_name = artist_name;" + "ctx._source.salon_id = salon;" + "ctx._source.salon_avatar = salon_avatar;" + "ctx._source.salon_name = salon_name;" + "ctx._source.title = title;" + "ctx._source.location = location;" + "ctx._source.description = description;" + "ctx._source.get_picture = get_picture;" + "ctx._source.metadata = metadata;" + "ctx._source.gender = gender;" + "ctx._source.price = price;" + "ctx._source.currency = currency;" + "ctx._source.likes = likes;" + "ctx._source.comments = comments;" + "ctx._source.tags = tags;" + "ctx._source.status = status;" + "ctx._source.style = style;" + "ctx._source.rating = rating", params={ 'listing': self.id, 'artist': self.get_artist_id(), 'artist_avatar': self.get_artist_avatar(), 'artist_name': self.get_artist_name(), 'salon': self.get_salon_id(), 'salon_avatar': self.get_salon_avatar(), 'salon_name': self.get_salon_name(), 'title': self.title, 'location': location_es, 'description': self.description, 'get_picture': self.get_picture(), 'metadata': self.metadata, 'gender': self.gender, 'price': self.price, 'currency': self.currency, 'likes': self.likes, 'comments': self.comments, 'tags': self.get_tags(), 'status': self.status, 'style': self.get_style(), 'rating': self.get_rating() }) super(Listing, self).save(force_insert, force_update) else: super(Listing, self).save(force_insert, force_update) location = self.get_location() location_es = "{0},{1}".format(location.y, location.x) es.index('glamazer', 'modelresult', { 'listing_id': self.id, 'artist_id': self.artist_id, 'artist_avatar': self.get_artist_avatar(), 'artist_name': self.get_artist_name(), 'salon_id': self.get_salon_id(), 'salon_avatar': self.get_salon_avatar(), 'salon_name': self.get_salon_name(), 'title': self.title, 'location': location_es, 'description': self.description, 'get_picture': self.get_picture(), 'metadata': self.metadata, 'gender': self.gender, 'price': self.price, 'currency': self.currency, 'likes': self.likes, 'comments': self.comments, 'tags': self.get_tags(), 'status': self.status, 'style': self.get_style(), 'rating': self.get_rating() }, id='listings.listing.{0}'.format(self.id)) es.refresh('glamazer')
class ElasticSearchDataStore(datastore.DataStore): """Implements the API.""" def __init__(self, index_list): # Connect to the Elasticsearch server. self.client = ElasticSearch('http://%s:%s/' % (ELASTICSEARCH_SERVER_IP, ELASTICSEARCH_PORT)) # TODO Refactor this to not need the index list at this stage. self.index_list = index_list def search(self, sketch, query, filters): """Search ElasticSearch. This will take a query string from the UI together with a filter definition. Based on this it will send the search request to elasticsearch and get result back. Args: sketch -- string, sketch ID query -- string, query string filters -- dict, Dictionary containing filters to apply Returns: Set of event documents in JSON format """ if filters.get("time_start", None): query = { "query": { "query_string": { "query": query } }, "filter": { "range": { "datetime": { "gte": filters['time_start'], "lte": filters['time_end'] } } }, "sort": { "datetime": "asc" } } elif filters.get("star", None): query = { "query": { "match_all": {} }, "filter": { "nested": { "path": "timesketch_label", "filter": { "bool": { "must": [ { "term": { "timesketch_label.name": "__ts_star" } }, { "term": { "timesketch_label.sketch": str(sketch) } } ] } } } }, "sort": { "datetime": "asc" } } else: query = { "query": { "query_string": { "query": query } }, "sort": { "datetime": "asc" } } return self.client.search(query, index=self.index_list, doc_type="plaso_event", size=500) def get_single_event(self, event_id): """Get singel event document form elasticsearch Args: event_id -- string, event ID Returns: Event document as JSON """ return self.client.get(index=self.index_list[0], doc_type="plaso_event",id=event_id) def add_label_to_event(self, event, sketch, user, label, toggle=False): """Add label to a event document in ElasticSearch. Args: event -- string, event ID sketch -- string, sketch ID user -- string, user ID label -- string, the label to apply toggle -- Bool, Toggle label or create a new one Returns: HTTP status code In order for this to work, we need to add a mapping for this nested document. This needs to be done when the index is forst created. mapping = { "plaso_event": { "properties": { "timesketch_label": { "type": "nested" } } } } """ doc = self.client.get(self.index_list, "plaso_event", event) try: doc['_source']['timesketch_label'] except KeyError: doc = {"timesketch_label": []} self.client.update(self.index_list, "plaso_event", event, doc=doc) if toggle: script_string = "if(ctx._source.timesketch_label.contains"\ "(timesketch_label)) {ctx._source.timesketch_label"\ ".remove(timesketch_label)} else {ctx._source."\ "timesketch_label += timesketch_label}" else: script_string = "if( ! ctx._source.timesketch_label.contains"\ "(timesketch_label)) {ctx._source.timesketch_label"\ "+= timesketch_label}" script = { "script": script_string, "params": { "timesketch_label": { "name": label, "user": user, "sketch": sketch } } } self.client.update(self.index_list, "plaso_event", event, script)
try: jq["Runtime"] = int(runtime) except ValueError: jq.pop("Runtime", None) # remove comma from imbdbVotes and make into an int if "imdbVotes" in jq: votes = re.sub(",", "", jq["imdbVotes"]) try: jq["imdbVotes"] = int(votes) except ValueError: jq.pop("imdbVotes", None) # parse Metascore to float if "Metascore" in jq: try: jq["Metascore"] = float(jq["Metascore"]) except ValueError: jq.pop("Metascore", None) # parse imdbRating to float if "imdbRating" in jq: try: jq["imdbRating"] = float(jq["imdbRating"]) except ValueError: jq.pop("imdbRating", None) metadata = {"metadata": "true"} for key, value in jq.iteritems(): #print "key=%s value=%s" % (key, value) metadata[key] = jq[key] # update #print "update=%s" % metadata print es.update("prime", "video", mid, doc=metadata)
class ElasticConnector(Connector): """ Class for connectors that are operate with elasticsearch database """ MAX_SIZE = 1000 def __init__(self, database, host='http://localhost:9200/'): self.client = ElasticSearch(host) self.index = database self.create_index() def query_to_id(self, query): """ Returns id representation of a specified query This is a temporary method as a replacement of elasticsearch query search """ return "_".join(str(k) + "_" + str(v) for k, v in query.items()).replace("/", "_") def create_index(self): """ Creates specified index or catches an exception if it has already been created """ try: self.client.create_index(self.index) except Exception as e: pass def set_dynamic_mapping(self, collection): """ Sets dynamic mapping for a specified document type """ self.client.put_mapping(self.index, collection, {'dynamic': True}) def save_block(self, block): """ Saves operation info in a database """ super().save_block(block) collection = block.get_collection() dictionary = block.to_dict() query = block.get_query() self.update_by_query(collection, query, block) def update_by_query(self, collection, query, document): """ Sets dynamic mapping for a specified collection, then creates a new id for a document depending on query for it. Saves a new object in a database as a new one """ try: self.set_dynamic_mapping(collection) document_id = document.get_id() document_body = document.to_dict() if "_id" in document_body.keys(): del document_body['_id'] self.client.index(self.index, collection, document_body, id=self.query_to_id(query)) except Exception as e: print(e) pass def find_last_block(self): """ Finds last block index as a value field of a document in a status collection with specified id """ try: document = self.client.get(self.index, 'status', 'height_all_tsx')['_source'] return document['value'] except ElasticHttpNotFoundError as e: return 0 def update_last_block(self, last_block): """ Updates last block index as a value field of a document in a status collection with specified id """ self.client.index(self.index, 'status', {'value': last_block}, id='height_all_tsx') def save_instance(self, instance): """ Saves account or comment object """ self.update_by_query(instance.get_collection(), instance.get_query(), instance) def get_instances_to_update(self, collection): """ Finds and returns all dictionaries with objects that should be updated """ hits = self.client.search("need_update:true", index=self.index, doc_type=collection, size=self.MAX_SIZE)['hits']['hits'] return [{**hit['_source'], **{"_id": hit["_id"]}} for hit in hits] def update_instances(self, collection, instances): """ Resets need_update flag for all instances in a list by their ids in _id field """ for instance in instances: self.client.update(self.index, collection, instance["_id"], doc={'need_update': False})