Example #1
0
def update_document(url, doc):
    es_server = 'http://localhost:9200/'
    if os.environ.get('ELASTICSEARCH_SERVER'):
        es_server = os.environ['ELASTICSEARCH_SERVER']
    es = ElasticSearch(es_server)

    try:
        es.update(index='memex', doc_type='page', id=url, script=doc)
    except:
        print "Unexpected error:", sys.exc_info()[0]
        pass
Example #2
0
def update_document(url,doc):
    es_server = 'http://localhost:9200/'
    if os.environ.get('ELASTICSEARCH_SERVER'):
        es_server = os.environ['ELASTICSEARCH_SERVER']
    es = ElasticSearch(es_server)
    
    try:
        es.update(index='memex',
                  doc_type='page',
                  id=url,
                  script=doc)
    except:
        print "Unexpected error:", sys.exc_info()[0]
        pass
Example #3
0
class Elastic(DataLayer):
    """ElasticSearch data layer."""

    serializers = {
        'integer': int,
        'datetime': parse_date
    }

    def init_app(self, app):
        app.config.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200/')
        app.config.setdefault('ELASTICSEARCH_INDEX', 'eve')
        self.es = ElasticSearch(app.config['ELASTICSEARCH_URL'])
        self.index = app.config['ELASTICSEARCH_INDEX']

    def _get_field_mapping(self, schema):
        """Get mapping for given field schema."""
        if schema['type'] == 'datetime':
            return {'type': 'date'}
        elif schema['type'] == 'string' and schema.get('unique'):
            return {'type': 'string', 'index': 'not_analyzed'}
        elif schema['type'] == 'string':
            return {'type': 'string'}

    def put_mapping(self, app):
        """Put mapping for elasticsearch for current schema.

        It's not called automatically now, but rather left for user to call it whenever it makes sense.
        """
        for resource, resource_config in app.config['DOMAIN'].items():
            properties = {}
            properties[config.DATE_CREATED] = self._get_field_mapping({'type': 'datetime'})
            properties[config.LAST_UPDATED] = self._get_field_mapping({'type': 'datetime'})

            for field, schema in resource_config['schema'].items():
                field_mapping = self._get_field_mapping(schema)
                if field_mapping:
                    properties[field] = field_mapping

            datasource = (resource, )  # TODO: config.SOURCES not available yet (self._datasource_ex(resource))
            mapping = {}
            mapping[datasource[0]] = {'properties': properties}
            self.es.put_mapping(self.index, datasource[0], mapping)

    def find(self, resource, req, sub_resource_lookup):
        """
        TODO: implement sub_resource_lookup
        """
        query = {
            'query': {
                'query_string': {
                    'query': request.args.get('q', '*'),
                    'default_field': request.args.get('df', '_all'),
                    'default_operator': 'AND'
                }
            }
        }

        if not req.sort and self._default_sort(resource):
            req.sort = self._default_sort(resource)

        # skip sorting when there is a query to use score
        if req.sort and 'q' not in request.args:
            query['sort'] = []
            sort = ast.literal_eval(req.sort)
            for (key, sortdir) in sort:
                sort_dict = dict([(key, 'asc' if sortdir > 0 else 'desc')])
                query['sort'].append(sort_dict)

        if req.where:
            where = json.loads(req.where)
            if where:
                query['filter'] = {
                    'term': where
                }

        if req.max_results:
            query['size'] = req.max_results

        if req.page > 1:
            query['from'] = (req.page - 1) * req.max_results

        source_config = config.SOURCES[resource]
        if 'facets' in source_config:
            query['facets'] = source_config['facets']

        try:
            args = self._es_args(resource)
            args['es_fiels'] = self._fields(resource)
            return self._parse_hits(self.es.search(query, **args), resource)
        except es_exceptions.ElasticHttpError:
            return ElasticCursor()

    def find_one(self, resource, **lookup):
        args = self._es_args(resource)
        args['es_fields'] = self._fields(resource)

        if config.ID_FIELD in lookup:
            try:
                hit = self.es.get(id=lookup[config.ID_FIELD], **args)
            except es_exceptions.ElasticHttpNotFoundError:
                return

            if not hit['exists']:
                return

            doc = hit.get('fields', hit.get('_source', {}))
            doc['_id'] = hit.get('_id')
            convert_dates(doc, self._dates(resource))
            return doc
        else:
            query = {
                'query': {
                    'constant_score': {
                        'filter': {
                            'term': lookup
                        }
                    }
                }
            }

            try:
                args['size'] = 1
                docs = self._parse_hits(self.es.search(query, **args), resource)
                return docs.first()
            except es_exceptions.ElasticHttpNotFoundError:
                return None

    def find_list_of_ids(self, resource, ids, client_projection=None):
        args = self._es_args(resource)
        args['es_fields'] = self._fields(resource)
        return self._parse_hits(self.es.multi_get(ids, **args), resource)

    def insert(self, resource, doc_or_docs, **kwargs):
        ids = []
        kwargs.update(self._es_args(resource))
        for doc in doc_or_docs:
            doc.update(self.es.index(doc=doc, id=doc.get('_id'), **kwargs))
            ids.append(doc['_id'])
        self.es.refresh(self.index)
        return ids

    def update(self, resource, id_, updates):
        args = self._es_args(resource, refresh=True)
        return self.es.update(id=id_, doc=updates, **args)

    def replace(self, resource, id_, document):
        args = self._es_args(resource, refresh=True)
        args['overwrite_existing'] = True
        return self.es.index(document=document, id=id_, **args)

    def remove(self, resource, id_=None):
        args = self._es_args(resource, refresh=True)
        if id_:
            return self.es.delete(id=id_, **args)
        else:
            try:
                return self.es.delete_all(**args)
            except es_exceptions.ElasticHttpNotFoundError:
                return

    def _parse_hits(self, hits, resource):
        """Parse hits response into documents."""
        return ElasticCursor(hits, self._dates(resource))

    def _es_args(self, resource, refresh=None):
        """Get index and doctype args."""
        datasource = self._datasource(resource)
        args = {
            'index': self.index,
            'doc_type': datasource[0],
            }
        if refresh:
            args['refresh'] = refresh
        return args

    def _fields(self, resource):
        """Get projection fields for given resource."""
        datasource = self._datasource(resource)
        keys = datasource[2].keys()
        return ','.join(keys)

    def _default_sort(self, resource):
        datasource = self._datasource(resource)
        return datasource[3]

    def _dates(self, resource):
        dates = [config.LAST_UPDATED, config.DATE_CREATED]
        datasource = self._datasource(resource)
        schema = config.DOMAIN[datasource[0]]['schema']
        for field, field_schema in schema.items():
            if field_schema['type'] == 'datetime':
                dates.append(field)
        return dates
Example #4
0
    def save(self, force_insert=False, force_update=False, **kwargs):
        es = ElasticSearch(ELASTIC_SEARCH_URL)
        if self.id:
            location = self.get_location()
            location_es = "{0},{1}".format(location.y, location.x)
            es.update('glamazer', 'modelresult', 'listings.listing.{0}'.format(self.id),
                script="ctx._source.listing_id = listing;" +
                "ctx._source.artist_id = artist;" +
                "ctx._source.artist_avatar = artist_avatar;" +
                "ctx._source.title = title;" +
                "ctx._source.location = location;" +
                "ctx._source.description = description;" +
                "ctx._source.get_picture = get_picture;" +
                "ctx._source.metadata = metadata;" +
                "ctx._source.price = price;" +
                "ctx._source.likes = likes;" +
                "ctx._source.comments = comments;" +
                "ctx._source.tags = tags;" +
                "ctx._source.status = status;" +
                "ctx._source.style = style;" +
                "ctx._source.rating = rating",
                params={
                    'listing':self.id, 
                    'artist':self.get_artist_id(),
                    'artist_avatar':self.get_artist_avatar(),
                    'title':self.title,
                    'location':location_es,
                    'description':self.description, 
                    'get_picture':self.get_picture(),
                    'metadata':self.metadata,
                    'price':self.price,
                    'likes':self.likes,
                    'comments':self.comments,
                    'tags':self.get_tags(),
                    'status':self.status,
                    'style':self.get_style(),
                    'rating':self.get_rating()
                    })
            super(Listing, self).save(force_insert, force_update)
        else:
            super(Listing, self).save(force_insert, force_update)

            artist_user = self.artist.user
            artist_name = artist_user.first_name
            followers = Followers.objects.select_related().filter(artist=self.artist)
            for follower in followers:
                Notification.objects.create(
                    sender = artist_user,
                    receiver = follower.user,
                    time = current_time(),
                    short_text = NOTIFICATIONS_SHORT[10].format(artist=artist_name),
                    long_text = NOTIFICATIONS_LONG[10].format(artist=artist_name, listing=self.title, user_id=self.artist_id, metadata=self.id),
                )

            location = self.get_location()
            location_es = "{0},{1}".format(location.y, location.x)
            es.index('glamazer', 'modelresult', 
                {
                    'listing_id': self.id,
                    'artist_id': self.artist_id,
                    'artist_avatar':self.get_artist_avatar(),
                    'title': self.title,
                    'location': location_es,
                    'description': self.description,
                    'get_picture': self.get_picture(),
                    'metadata': self.metadata,
                    'price': self.price,
                    'likes': self.likes,
                    'comments':self.comments,
                    'tags': self.get_tags(),
                    'status':self.status,
                    'style':self.get_style(),
                    'rating':self.get_rating()
                }, id='listings.listing.{0}'.format(self.id))
            es.refresh('glamazer')
Example #5
0
            except ValueError:
                jq.pop("Runtime", None)
        # remove comma from imbdbVotes and make into an int
        if "imdbVotes" in jq:
            votes = re.sub(",", "", jq["imdbVotes"])
            try:
                jq["imdbVotes"] = int(votes)
            except ValueError:
                jq.pop("imdbVotes", None)
        # parse Metascore to float
        if "Metascore" in jq:
            try:
                jq["Metascore"] = float(jq["Metascore"])
            except ValueError:
                jq.pop("Metascore", None)
        # parse imdbRating to float
        if "imdbRating" in jq:
            try:
                jq["imdbRating"] = float(jq["imdbRating"])
            except ValueError:
                jq.pop("imdbRating", None)
        metadata = {
            "metadata" : "true"
        }
        for key, value in jq.iteritems():
            #print "key=%s value=%s" % (key, value)
            metadata[key] = jq[key]
    # update
    #print "update=%s" % metadata
    print es.update("prime", "video", mid, doc=metadata)
Example #6
0
    def save(self, force_insert=False, force_update=False, **kwargs):
        es = ElasticSearch(ELASTIC_SEARCH_URL)
        if self.id:
            location = self.get_location()
            location_es = "{0},{1}".format(location.y, location.x)
            es.update(
                'glamazer',
                'modelresult',
                'listings.listing.{0}'.format(self.id),
                script="ctx._source.listing_id = listing;" +
                "ctx._source.artist_id = artist;" +
                "ctx._source.artist_avatar = artist_avatar;" +
                "ctx._source.artist_name = artist_name;" +
                "ctx._source.salon_id = salon;" +
                "ctx._source.salon_avatar = salon_avatar;" +
                "ctx._source.salon_name = salon_name;" +
                "ctx._source.title = title;" +
                "ctx._source.location = location;" +
                "ctx._source.description = description;" +
                "ctx._source.get_picture = get_picture;" +
                "ctx._source.metadata = metadata;" +
                "ctx._source.gender = gender;" + "ctx._source.price = price;" +
                "ctx._source.currency = currency;" +
                "ctx._source.likes = likes;" +
                "ctx._source.comments = comments;" +
                "ctx._source.tags = tags;" + "ctx._source.status = status;" +
                "ctx._source.style = style;" + "ctx._source.rating = rating",
                params={
                    'listing': self.id,
                    'artist': self.get_artist_id(),
                    'artist_avatar': self.get_artist_avatar(),
                    'artist_name': self.get_artist_name(),
                    'salon': self.get_salon_id(),
                    'salon_avatar': self.get_salon_avatar(),
                    'salon_name': self.get_salon_name(),
                    'title': self.title,
                    'location': location_es,
                    'description': self.description,
                    'get_picture': self.get_picture(),
                    'metadata': self.metadata,
                    'gender': self.gender,
                    'price': self.price,
                    'currency': self.currency,
                    'likes': self.likes,
                    'comments': self.comments,
                    'tags': self.get_tags(),
                    'status': self.status,
                    'style': self.get_style(),
                    'rating': self.get_rating()
                })
            super(Listing, self).save(force_insert, force_update)
        else:
            super(Listing, self).save(force_insert, force_update)

            location = self.get_location()
            location_es = "{0},{1}".format(location.y, location.x)
            es.index('glamazer',
                     'modelresult', {
                         'listing_id': self.id,
                         'artist_id': self.artist_id,
                         'artist_avatar': self.get_artist_avatar(),
                         'artist_name': self.get_artist_name(),
                         'salon_id': self.get_salon_id(),
                         'salon_avatar': self.get_salon_avatar(),
                         'salon_name': self.get_salon_name(),
                         'title': self.title,
                         'location': location_es,
                         'description': self.description,
                         'get_picture': self.get_picture(),
                         'metadata': self.metadata,
                         'gender': self.gender,
                         'price': self.price,
                         'currency': self.currency,
                         'likes': self.likes,
                         'comments': self.comments,
                         'tags': self.get_tags(),
                         'status': self.status,
                         'style': self.get_style(),
                         'rating': self.get_rating()
                     },
                     id='listings.listing.{0}'.format(self.id))
            es.refresh('glamazer')
Example #7
0
            }
        }
    }
    #print query
    result = es.search(query,size=10000,index=index)
    for r in result['hits']['hits']:
        #print r['_source']['pub_content']
        query2 = {'query':{
                "bool":{
                    "must":[
                        {"match_phrase" : {"art_content":r['_source']['art_content']}}
                    ],
                    "must_not":{
                        "match" : {"_id":r['_id']}
                    }
                }
            }
        }
        partialResult = es.search(query2,size=paginationSize,index=index)
        print partialResult['hits']['total']
        if partialResult['hits']['total'] > 0:
            doc = {}
            doc['art_repeated'] = 1
            es.update(index,index,r['_id'],doc=doc)
        else:
            doc = {}
            doc['art_repeated'] = 0
            es.update(index,index,r['_id'],doc=doc)
        #for pr in partialResult['hits']['hits']:
            #print pr['_source']['art_content']
Example #8
0
    def save(self, force_insert=False, force_update=False, **kwargs):
        es = ElasticSearch(ELASTIC_SEARCH_URL)
        if self.id:
            location = self.get_location()
            location_es = "{0},{1}".format(location.y, location.x)
            es.update('glamazer',
                      'modelresult',
                      'listings.listing.{0}'.format(self.id),
                      script="ctx._source.listing_id = listing;" +
                             "ctx._source.artist_id = artist;" +
                             "ctx._source.artist_avatar = artist_avatar;" +
                             "ctx._source.artist_name = artist_name;" +
                             "ctx._source.salon_id = salon;" +
                             "ctx._source.salon_avatar = salon_avatar;" +
                             "ctx._source.salon_name = salon_name;" +
                             "ctx._source.title = title;" +
                             "ctx._source.location = location;" +
                             "ctx._source.description = description;" +
                             "ctx._source.get_picture = get_picture;" +
                             "ctx._source.metadata = metadata;" +
                             "ctx._source.gender = gender;" +
                             "ctx._source.price = price;" +
                             "ctx._source.currency = currency;" +
                             "ctx._source.likes = likes;" +
                             "ctx._source.comments = comments;" +
                             "ctx._source.tags = tags;" +
                             "ctx._source.status = status;" +
                             "ctx._source.style = style;" +
                             "ctx._source.rating = rating",
                      params={
                             'listing': self.id,
                             'artist': self.get_artist_id(),
                             'artist_avatar': self.get_artist_avatar(),
                             'artist_name': self.get_artist_name(),
                             'salon': self.get_salon_id(),
                             'salon_avatar': self.get_salon_avatar(),
                             'salon_name': self.get_salon_name(),
                             'title': self.title,
                             'location': location_es,
                             'description': self.description,
                             'get_picture': self.get_picture(),
                             'metadata': self.metadata,
                             'gender': self.gender,
                             'price': self.price,
                             'currency': self.currency,
                             'likes': self.likes,
                             'comments': self.comments,
                             'tags': self.get_tags(),
                             'status': self.status,
                             'style': self.get_style(),
                             'rating': self.get_rating()
                        })
            super(Listing, self).save(force_insert, force_update)
        else:
            super(Listing, self).save(force_insert, force_update)

            location = self.get_location()
            location_es = "{0},{1}".format(location.y, location.x)
            es.index('glamazer', 'modelresult', {
                'listing_id': self.id,
                'artist_id': self.artist_id,
                'artist_avatar': self.get_artist_avatar(),
                'artist_name': self.get_artist_name(),
                'salon_id': self.get_salon_id(),
                'salon_avatar': self.get_salon_avatar(),
                'salon_name': self.get_salon_name(),
                'title': self.title,
                'location': location_es,
                'description': self.description,
                'get_picture': self.get_picture(),
                'metadata': self.metadata,
                'gender': self.gender,
                'price': self.price,
                'currency': self.currency,
                'likes': self.likes,
                'comments': self.comments,
                'tags': self.get_tags(),
                'status': self.status,
                'style': self.get_style(),
                'rating': self.get_rating()
                }, id='listings.listing.{0}'.format(self.id))
            es.refresh('glamazer')
class ElasticSearchDataStore(datastore.DataStore):
    """Implements the API.""" 
    def __init__(self, index_list):
        # Connect to the Elasticsearch server.
        self.client = ElasticSearch('http://%s:%s/' % (ELASTICSEARCH_SERVER_IP,
                                                       ELASTICSEARCH_PORT))
        # TODO Refactor this to not need the index list at this stage.
        self.index_list = index_list

    def search(self, sketch, query, filters):
        """Search ElasticSearch. This will take a query string from the UI
        together with a filter definition. Based on this it will send the
        search request to elasticsearch and get result back.

        Args:
            sketch -- string, sketch ID
            query -- string, query string
            filters -- dict, Dictionary containing filters to apply 

        Returns:
            Set of event documents in JSON format
        """

        if filters.get("time_start", None):
            query = {
                "query": {
                    "query_string": {
                        "query": query
                    }
                },
                "filter": {
                    "range": {
                        "datetime": {
                            "gte": filters['time_start'],
                            "lte": filters['time_end']
                        }
                    }
                },
                "sort": {
                    "datetime": "asc"
                }
            }
        elif filters.get("star", None):
            query = {
                "query": {
                    "match_all": {}
                },
                "filter": {
                    "nested": {
                        "path": "timesketch_label", "filter": {
                        "bool": {
                            "must": [
                                {
                                    "term": {
                                        "timesketch_label.name": "__ts_star"
                                    }
                                },
                                {
                                    "term": {
                                        "timesketch_label.sketch": str(sketch)
                                    }
                                }
                            ]
                        }
                        }
                    }
                },
                "sort": {
                    "datetime": "asc"
                }
            }
        else:
            query = {
                "query": {
                    "query_string": {
                        "query": query
                    }
                },
                "sort": {
                    "datetime": "asc"
                }
            }

        return self.client.search(query, index=self.index_list,
                                  doc_type="plaso_event", size=500)


    def get_single_event(self, event_id):
        """Get singel event document form elasticsearch

        Args:
            event_id -- string, event ID

        Returns:
            Event document as JSON
        """
        return self.client.get(index=self.index_list[0],
            doc_type="plaso_event",id=event_id)

    def add_label_to_event(self, event, sketch, user, label, toggle=False):
        """Add label to a event document in ElasticSearch.

        Args:
            event -- string, event ID
            sketch -- string, sketch ID
            user -- string, user ID
            label -- string, the label to apply
            toggle -- Bool, Toggle label or create a new one

        Returns:
            HTTP status code

        In order for this to work, we need to add a mapping for this nested
        document. This needs to be done when the index is forst created.
        mapping = {
            "plaso_event": {
                "properties": {
                    "timesketch_label": {
                        "type": "nested"
                    }
                }
            }
        }
        """

        doc = self.client.get(self.index_list, "plaso_event", event)
        try:
            doc['_source']['timesketch_label']
        except KeyError:
            doc = {"timesketch_label": []}
            self.client.update(self.index_list, "plaso_event", event, doc=doc)

        if toggle:
            script_string = "if(ctx._source.timesketch_label.contains"\
                            "(timesketch_label)) {ctx._source.timesketch_label"\
                            ".remove(timesketch_label)} else {ctx._source."\
                            "timesketch_label += timesketch_label}"
        else:
            script_string = "if( ! ctx._source.timesketch_label.contains"\
                            "(timesketch_label)) {ctx._source.timesketch_label"\
                            "+= timesketch_label}"
        script = {
            "script": script_string,
            "params": {
                "timesketch_label": {
                    "name": label, "user": user, "sketch": sketch
                }
            }
        }
        self.client.update(self.index_list, "plaso_event", event, script)
Example #10
0
            try:
                jq["Runtime"] = int(runtime)
            except ValueError:
                jq.pop("Runtime", None)
        # remove comma from imbdbVotes and make into an int
        if "imdbVotes" in jq:
            votes = re.sub(",", "", jq["imdbVotes"])
            try:
                jq["imdbVotes"] = int(votes)
            except ValueError:
                jq.pop("imdbVotes", None)
        # parse Metascore to float
        if "Metascore" in jq:
            try:
                jq["Metascore"] = float(jq["Metascore"])
            except ValueError:
                jq.pop("Metascore", None)
        # parse imdbRating to float
        if "imdbRating" in jq:
            try:
                jq["imdbRating"] = float(jq["imdbRating"])
            except ValueError:
                jq.pop("imdbRating", None)
        metadata = {"metadata": "true"}
        for key, value in jq.iteritems():
            #print "key=%s value=%s" % (key, value)
            metadata[key] = jq[key]
    # update
    #print "update=%s" % metadata
    print es.update("prime", "video", mid, doc=metadata)
Example #11
0
class ElasticConnector(Connector):
    """
    Class for connectors that are operate with elasticsearch database
  """
    MAX_SIZE = 1000

    def __init__(self, database, host='http://localhost:9200/'):
        self.client = ElasticSearch(host)
        self.index = database
        self.create_index()

    def query_to_id(self, query):
        """
      Returns id representation of a specified query
      This is a temporary method as a replacement of elasticsearch query search
    """
        return "_".join(str(k) + "_" + str(v)
                        for k, v in query.items()).replace("/", "_")

    def create_index(self):
        """
      Creates specified index or catches an exception if it has already been created
    """
        try:
            self.client.create_index(self.index)
        except Exception as e:
            pass

    def set_dynamic_mapping(self, collection):
        """
      Sets dynamic mapping for a specified document type
    """
        self.client.put_mapping(self.index, collection, {'dynamic': True})

    def save_block(self, block):
        """
      Saves operation info in a database
    """
        super().save_block(block)
        collection = block.get_collection()
        dictionary = block.to_dict()
        query = block.get_query()
        self.update_by_query(collection, query, block)

    def update_by_query(self, collection, query, document):
        """
      Sets dynamic mapping for a specified collection,
      then creates a new id for a document depending on query for it.
      Saves a new object in a database as a new one
    """
        try:
            self.set_dynamic_mapping(collection)
            document_id = document.get_id()
            document_body = document.to_dict()
            if "_id" in document_body.keys():
                del document_body['_id']
            self.client.index(self.index,
                              collection,
                              document_body,
                              id=self.query_to_id(query))
        except Exception as e:
            print(e)
            pass

    def find_last_block(self):
        """
      Finds last block index as a value field of a document 
      in a status collection with specified id
    """
        try:
            document = self.client.get(self.index, 'status',
                                       'height_all_tsx')['_source']
            return document['value']
        except ElasticHttpNotFoundError as e:
            return 0

    def update_last_block(self, last_block):
        """
      Updates last block index as a value field of a document 
      in a status collection with specified id
    """
        self.client.index(self.index,
                          'status', {'value': last_block},
                          id='height_all_tsx')

    def save_instance(self, instance):
        """
      Saves account or comment object
    """
        self.update_by_query(instance.get_collection(), instance.get_query(),
                             instance)

    def get_instances_to_update(self, collection):
        """
      Finds and returns all dictionaries with objects that should be updated
    """
        hits = self.client.search("need_update:true",
                                  index=self.index,
                                  doc_type=collection,
                                  size=self.MAX_SIZE)['hits']['hits']
        return [{**hit['_source'], **{"_id": hit["_id"]}} for hit in hits]

    def update_instances(self, collection, instances):
        """
      Resets need_update flag for all instances in a list by their ids in _id field
    """
        for instance in instances:
            self.client.update(self.index,
                               collection,
                               instance["_id"],
                               doc={'need_update': False})