Exemple #1
1
def delete_all():
    es = Elasticsearch()
    search_body = {"from": 0, "size": 10000, "query": {"query_string": {"query": "*"}}}
    for e in es.search(body=search_body).get("hits").get("hits"):
        print(e["_type"])
        print(e["_id"])
        es.delete(index=config.db_name, doc_type=e["_type"], id=e["_id"])
Exemple #2
1
 def es_delete(self):
     es = Elasticsearch()
     try:
         es.delete(index=ES_FAMILIAS_INDEX, doc_type=ES_FAMILIAS_DOC_TYPE, id=self.cod_material)
     # @todo NotFoundError
     except:
         pass
Exemple #3
1
def delete():
    es = Elasticsearch([{'host':'219.224.134.214','port':9202}])
    query_body = {"size":100000,"query":{ "filtered": {
        "filter":{"range":{"publish_time":{"gte": 1520380800,"lte": 1528329600}}}
    }}}
    res = es.search(index="announcement", doc_type="basic_info", body=query_body,request_timeout=100)
    hits = res['hits']['hits']
    print len(hits)
    num = 0
    for hit in hits:
        es.delete(index="announcement", doc_type="basic_info", id=hit['_id'])
        if num %1000 == 0:
            print num
        num += 1
    def _elastic(self, doc_id=None, doc={}, option='create'):
        """
        option: 
            init: 初始化文档结构。(当config.ini中的init为True时才会执行。)
            create: 若文档已存在,则不执行任何操作。 若文档不存在,则直接创建。
            update: 若文档已存在,则直接更新。 若文档不存在,则不执行任何操作。
            delete: 若文档已存在,则直接删除。若文档不存在,则不执行任何操作。
        """
        esclient = Elasticsearch([self.elastic])

        status = 'Success !'

        if 'create' == option:
            try:
                esclient.create(
                    index=self.elastic['index'],
                    doc_type=self.elastic['type'],
                    id=doc_id,
                    body=doc,
                )
            except ConflictError:
                status = 'Fail(existsd) !'

        elif 'update' == option:
            try:
                esclient.update(
                    index=self.elastic['index'],
                    doc_type=self.elastic['type'],
                    id=doc_id,
                    body={'doc': doc},
                )
            except NotFoundError:
                status = 'Fail(not existsd) !'

        elif 'delete' == option:
            try:
                esclient.delete(
                    index=self.elastic['index'],
                    doc_type=self.elastic['type'],
                    id=doc_id,
                )
            except NotFoundError:
                status = 'Fail(not existsd) !'

        elif 'init' == option:
            try:
                IndicesClient(esclient).create(
                    index=self.elastic['index'],
                    body=doc,
                )
            except RequestError:
                status = 'Fail(existsd) !'

        self.logger.record('Sync@%s < %s-%s-%s > %s' % (
            option,
            self.elastic['index'],
            self.elastic['type'],
            doc_id,
            status,
        ))
Exemple #5
0
class StreamingIndexer(TwythonStreamer):

    def __init__(self, consumer_key=None, consumer_secret=None,
                 access_token=None, access_token_secret=None,
                 es_host=None, es_port=None, es_index=None):

        super(StreamingIndexer, self).__init__(consumer_key, consumer_secret,
                                               access_token,
                                               access_token_secret)

        self._es = Elasticsearch([{'host': es_host, 'port': es_port}])
        self._index = es_index

    def on_success(self, tweet):
        if 'delete' in tweet:
            status_id = tweet['delete']['status']['id']
            self._es.delete(self._index, 'tweet', status_id)
            return

        if 'retweeted_status' in tweet:
            tweet = tweet['retweeted_status']

        for url in tweet['entities']['urls']:
            if 'theguardian.com' in url['expanded_url']:
                url['domain'] = 'theguardian.com'

        self._es.index(index=self._index, doc_type='tweet',
                       id=tweet['id_str'], body=tweet)
class ElasticSearch(TableStorage):
    """ElasticSearch provider for TableStorage."""
    def __init__(self, config: ElasticSearchConfig):
        self._es_instance = Elasticsearch(config.access_url,
                                          http_auth=('elastic',
                                                     config.access_key))
        self._doc_type = 'entries'

    def write(self, resource):
        """Write resource to table.

        :param resource: Expecting Resource object
            (see Common.Contracts.Resource)
        :return: None
        """
        entry = resource.to_dict()
        location = entry['location']
        identifier = str(uuid.uuid3(uuid.NAMESPACE_DNS, entry['id']))
        self._es_instance.index(index=location,
                                doc_type=self._doc_type,
                                id=identifier,
                                body=entry)

    def query(self, partition_key, row_key):
        """Get entry with specified partition and row keys.

        :param partition_key: Partition key for entry
        :param row_key: Row key for entry
        :return: Entity if found, None otherwise
        """
        task = self._es_instance.get(index=partition_key,
                                     doc_type=self._doc_type,
                                     id=row_key)
        return task

    def query_list(self):
        """Get entities from table.

        :return: List of entities from table
        """
        return self._es_instance.search("*")

    def delete(self, partition_key, row_key):
        """Delete entry with specified partition and row keys.

        :param partition_key: Partition key for entry
        :param row_key: Row key for entry
        :return: None
        """
        self._es_instance.delete(index=partition_key,
                                 doc_type=self._doc_type,
                                 id=row_key)

    @staticmethod
    def create():
        """Initialize ElasticSearch service.

        :return: ElasticSearch service object
        """
        return ElasticSearch(ElasticSearchConfig())
Exemple #7
0
def delete_es(video):
    es = Elasticsearch(
        ES_URL,
        timeout=ES_TIMEOUT,
        max_retries=ES_MAX_RETRIES,
        retry_on_timeout=True,
    )
    if es.ping():
        try:
            if ES_VERSION == 7:
                delete = es.delete(index=ES_INDEX,
                                   id=video.id,
                                   refresh=True,
                                   ignore=[400, 404])
            else:
                delete = es.delete(
                    index=ES_INDEX,
                    doc_type="pod",
                    id=video.id,
                    refresh=True,
                    ignore=[400, 404],
                )
            if DEBUG:
                logger.info(delete)
            return delete
        except TransportError as e:
            logger.error("An error occured during delete video : %s-%s : %s" %
                         (e.status_code, e.error, e.info))
Exemple #8
0
class ElasticsearchModule(AbstractModule):
    """
    Override the elastic API. (Not really usefull for the moment)
    """
    def __init__(self, config_parser):
        self.SETTINGS = {
            "host": config_parser.get('elasticsearch', 'host'),
            "port": config_parser.get('elasticsearch', 'port')
        }
        self.es_stream = Elasticsearch()
        self.insert_number = 0
        self.update_number = 0
        self.delete_number = 0

    def insert(self, index, doc_type, id, doc):
        self.es_stream.index(index, doc_type, doc, id)
        self.insert_number += 1

    def delete(self, index, doc_type, id):
        self.es_stream.delete(index, doc_type, id)
        self.delete_number += 1

    def update(self, index, doc_type, id, doc):
        self.insert(index, doc_type, id, doc)
        self.update_number += 1

    @property
    def server_information(self):
        return {
            'elastic_avalaible': self.es_stream.ping(),
            'insert_number': self.insert_number,
            'update_number': self.update_number,
            'delete_number': self.delete_number
        }
    def unindexer(self, handle, ring=None, idx=None):

        # Connect to Elastic Search Node
        es = Elasticsearch([ES_NODE])

        out = {}
        out['unindexed'] = []

        if handle and ring and idx:

            es.delete(index=handle, doc_type=ring, id=idx, ignore=[400, 404])
            i = '%s/%s/%s' % (handle, ring, idx)
        elif handle and ring:
            #es.delete(index=handle,doc_type=ring, ignore=[400, 404]) This doesnt work
            # Could not make elasticsearch_py delete a doc_type only
            requests.delete('%s/%s/%s' % (ES_NODE, handle, ring))
            i = '%s/%s' % (handle, ring)
        elif handle:
            es.indices.delete(index=handle, ignore=[400, 404])
            i = '%s' % (handle)

        self.lggr.info('UnIndexing:%s' % i)

        out['unindexed'].append(i)

        d = {}
        d['json_out'] = json.dumps(out)
        d['template'] = 'base_json.html'
        return d
Exemple #10
0
def instaimport(searchindex, thisdoctype, newsbody):
    # Index by Elastic
    es = Elasticsearch(['media-audit.com'],
                       http_auth=('elastic', 'changeme'),
                       port=9200)

    if newsbody:
        searchindex = 'instagram'
        try:
            es.create(index=searchindex,
                      doc_type=thisdoctype,
                      body=newsbody,
                      id=newsbody['id'])

        except:
            es.delete(index=searchindex,
                      doc_type=thisdoctype,
                      id=newsbody['id'])
            print('Delete Id =' + newsbody['id'])
            try:
                es.create(index=searchindex,
                          doc_type=thisdoctype,
                          body=newsbody,
                          id=newsbody['id'])
                print('Id = ' + newsbody['id'] + ' was upload \n')
            except:
                es.delete(index=searchindex,
                          doc_type=thisdoctype,
                          id=newsbody['id'])
                print("Skip")

    return 'ok'
Exemple #11
0
def delete():
    es = Elasticsearch([{'host': '219.224.134.214', 'port': 9202}])
    query_body = {
        "size": 100000,
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "publish_time": {
                            "gte": 1520380800,
                            "lte": 1528329600
                        }
                    }
                }
            }
        }
    }
    res = es.search(index="announcement",
                    doc_type="basic_info",
                    body=query_body,
                    request_timeout=100)
    hits = res['hits']['hits']
    print len(hits)
    num = 0
    for hit in hits:
        es.delete(index="announcement", doc_type="basic_info", id=hit['_id'])
        if num % 1000 == 0:
            print num
        num += 1
class ESModule:
    '''
    elasticsearch 操作类
    '''
    def __init__(self):
        self.es = Elasticsearch(ES_ADDRESS)

    def update(self, index, my_id, json_data, doc_type='data'):
        '''
        es 更新操作
        :param index: es 的索引
        :param my_id: 每条数据的 ID
        :param json_data: 需要写入的 json 数据
        :param doc_type: 类型,默认为 data
        :return: None
        '''
        body = list()
        body.append(
            {'update': {
                '_id': my_id,
                '_type': doc_type,
                '_index': index
            }})
        body.append({'doc': json_data, 'doc_as_upsert': True})
        self.es.bulk(body)

    def delete(self, index, my_id, doc_type='data'):
        '''
        es 删除操作
        :param index: es 的索引
        :param my_id: 每条数据的 ID
        :param doc_type: 类型,默认为 data
        :return: None
        '''
        self.es.delete(index=index, doc_type=doc_type, id=my_id)
Exemple #13
0
class NewsDelInfo(object):

    def __init__(self):
        self.hbase_con = HbaseInfoTask()
        self.redis_con = RedisTools()
        self.es = Elasticsearch(ES_ADDR, ignore=404)

    def es_ping(self):
        if not self.es.ping():
            self.es = Elasticsearch(ES_ADDR, ignore=404)

    def run(self):
        while True:
            rowkey = self.redis_con.get_yy_rowkey("es:news:del:info")
            _id = trans_md5(rowkey)
            self.es_ping()
            try:
                boo = self.es.exists(index="xw_info",doc_type="sino",id=_id)
                if boo:
                    self.es.delete(index="xw_info",doc_type="sino",id=_id)
            except Exception as e:
                log_info = "news info delete error %s" %str(e)
                logging.error(log_info)
                boo = self.es.exists(index="xw_info", doc_type="sino", id=_id)
                if boo:
                    self.es.delete(index="xw_info", doc_type="sino", id=_id)
Exemple #14
0
class ElasticsearchTest(unittest.TestCase):
    def setUp(self) -> None:
        self.elasticsearch = Elasticsearch(host='localhost', port=9200)
        self.index = "elasticsearch_test"
        self.data = {"test": "elasticsearch_service"}

    def test_ES_connection(self):
        res = requests.get('http://localhost:9200')
        self.assertEqual(200, res.status_code)

    def test_service_running(self):
        ServiceV().validate('isrunning', ["elasticsearch"])

    def test_insert_data(self):
        # Insert data
        res = self.elasticsearch.index(index=self.index, id=1, body=self.data)
        self.assertEqual(res['result'], "created")
        self.elasticsearch.delete(index=self.index, id=1)

    def test_get_data(self):
        # Get data
        self.elasticsearch.index(index=self.index, id=1, body=self.data)
        res = self.elasticsearch.get(index=self.index, id=1)
        self.assertIs(res['found'], True)
        self.elasticsearch.delete(index=self.index, id=1)

    def delete_data(self):
        # Delete index
        self.elasticsearch.index(index=self.index, id=1, body=self.data)
        res = self.elasticsearch.delete(index=self.index)
        self.assertIs(res['acknowledged'], True)
def deleteESItem(elasticsearchDomain, documentId):
    host = elasticsearchDomain

    if (documentId):
        service = 'es'
        ss = boto3.Session()
        credentials = ss.get_credentials()
        region = ss.region_name

        awsauth = AWS4Auth(credentials.access_key,
                           credentials.secret_key,
                           region,
                           service,
                           session_token=credentials.token)

        es = Elasticsearch(hosts=[{
            'host': host,
            'port': 443
        }],
                           http_auth=awsauth,
                           use_ssl=True,
                           verify_certs=True,
                           connection_class=RequestsHttpConnection)

        if es.exists(index="textract", doc_type="document", id=documentId):
            es.delete(index="textract", doc_type="document", id=documentId)
            print("Deleted document: {}".format(documentId))
Exemple #16
0
class ElasticSearchUtils(object):
    def __init__(self):
        self.es = Elasticsearch(hosts=[{
            'host': ELASTIC_SEARCH_CONFIG['host'],
            'port': ELASTIC_SEARCH_CONFIG['port']
        }])

    def insert(self, index, doc_type, body, id=None):
        """
        插入文档
        :param index: 索引名称
        :param doc_type: 文档名称
        :param body: 文档内容
        :param id: 序列号
        :return:
        """
        self.es.index(index, doc_type, body, id=id)

    def count(self, index_name):
        """
        :param index_name:
        :return: 统计index总数
        """
        return self.es.count(index=index_name)

    def delete(self, index_name, doc_type, id):
        """
        :param index_name:
        :param doc_type:
        :param id:
        :return: 删除index中具体的一条
        """
        self.es.delete(index=index_name, doc_type=doc_type, id=id)

    def get(self, index_name, doc_type, id):
        """
        根据文档编号索引数据
        :param index_name: 索引名称
        :param doc_type: 文档类型名称
        :param id: 编号
        :return:
        """
        return self.es.get(index_name, doc_type, id=id)

    def search(self, index_name, doc_type, body, size=10):
        """
        根据条件进行搜索
        :param index_name: 索引名称
        :param doc_type: 文档类型名称
        :param body: 查询条件
        :param size: 查询数据条目
        :return:
        """
        try:
            results = self.es.search(index=index_name,
                                     doc_type=doc_type,
                                     body=body)
            return results['hits']['hits'][:size]
        except Exception as err:
            logger.info(err)
Exemple #17
0
def set_previous_config():
    hosts = os.environ['ES_HOSTS'].split(',')
    es = Elasticsearch(hosts=hosts, maxsize=1)
    prev_conf = {
        "weights": {
            "desktop": {
                "1": 0.499,
                "2": 0.501,
                "updated": "2017-12-04 03:10:47"
            },
            "mobile": {}
        }
    }
    result = es.index(
        index='bandit_pio',
        doc_type='bandit_config',
        id=0,
        body=json.dumps(prev_conf),
    )

    yield prev_conf

    es.delete(
        index='bandit_pio',
        doc_type='bandit_config',
        id=0,
    )
Exemple #18
0
    def deleteData(urlCrawl, dataDate, u_id):
        es = Elasticsearch([{
            'host': app.config['ELASTICSEARCH_URI'],
            'port': app.config['ELASTICSEARCH_PORT']
        }])
        # doc = {
        #     'src_ip': urlCrawl.src_ip
        #     , 'dst_ip': urlCrawl.dst_ip
        #     , 'uri': urlCrawl.uri
        #     , 'domain': urlCrawl.domain
        #     , 'isCrawled': urlCrawl.isCrawled
        #     , 'depth': urlCrawl.depth
        #     , 'src_geoip': urlCrawl.src_geoip
        #     , 'dst_geoip': urlCrawl.dst_geoip
        #     , 'desc': urlCrawl.desc
        #     , 'register_path': urlCrawl.register_path
        #     , '@timestamp': datetime.utcnow().isoformat()
        #     , 'min_timestamp': datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
        #
        # }
        dataindex = 'gsp-{0}'.format(dataDate)
        #.format(datetime.now().strftime("%Y.%m.%d"))
        try:
            es.delete(index=dataindex, doc_type="url_jobs", id=u_id)
        # delete(self, index, doc_type, id, params=None)
        except Exception as e:
            raise e

        return json.dumps({'success': True}), 200, {
            'ContentType': 'application/json'
        }
class ElasticService():
    def __init__(self, host, port, username, password):
        super().__init__()
        requests.packages.urllib3.disable_warnings()
        try:
            log.info(f"connecting to elastic on host: {host}")
            self.es = Elasticsearch(f"https://{host}:{port}",
                                    http_auth=(username, password),
                                    verify_certs=False)
            self.es.info()
        except Exception as e:
            log.error(e)
            raise

    def create_index(self, index_name):
        try:
            print("creating index if not exists")
            self.es.indices.create(index=index_name, ignore=400)
        except Exception as e:
            log.error(e)

    def create_doc(self, index_name, id, body):
        try:
            log.info("creating doc in elastic")
            self.es.create(index=index_name, id=id, body=body)
        except Exception as e:
            log.error(e)

    def delete_doc(self, index_name, id):
        try:
            log.info("deleting doc from elastic")
            self.es.delete(index=index_name, id=id)
        except Exception as e:
            log.error(e)
Exemple #20
0
class SearchEngine():
    def __init__(self, config) -> None:
        url = config['URL'] if config['URL'] else 'localhost:9200'
        try:
            self.search_engine = Elasticsearch([url])
        except Exception as err:
            print(f'Ooops! Search engine error: {err}')

    def is_connected(self):
        return self.search_engine.ping()

    def index(self, index, id, body):
        if self.is_connected():
            self.search_engine.index(index=index, id=id, body=body)

    def delete(self, index, id):
        if self.is_connected():
            self.search_engine.delete(index=index, id=id)

    def delete_index(self, index):
        if self.is_connected():
            self.search_engine.indices.delete(index=index)

    def search(self, index, body):
        if self.is_connected():
            return self.search_engine.search(index=index, body=body)
        else:
            return None
Exemple #21
0
def update_index():
    es_settings = getattr(settings, "ELASTICSEARCH", "")
    es = Elasticsearch([es_settings])
    published_ids = set()
    for service in Service.objects.all():
        if service.publish:
            ser = ServiceSerializer(service)
            es.update(index='employment',
                      doc_type='employment_service',
                      id=service.id,
                      body={
                          'doc': ser.data,
                          'doc_as_upsert': True
                      })
            published_ids.add(service.id)
        else:
            es.delete(index='employment',
                      doc_type='employment_service',
                      id=service.id,
                      ignore=[400, 404])

    for doc in es.search(index="employment")['hits']['hits']:
        if not int(doc['_id']) in published_ids:
            print(doc['_id'])
            es.delete(index='employment',
                      doc_type='employment_service',
                      id=int(doc['_id']),
                      ignore=[400, 404])
class ElasticSearchTeamMemberRepository(TeamMemberRepository):

    def __init__(self):
        self.es = Elasticsearch(
            hosts=[{'host': 'elasticsearch'}]
        )

    def delete_by_id(self, id):
        self.es.delete(index="pits", doc_type="_doc", id=id)

    def update(self, id, team_member):
        json_team_member = json.dumps(team_member.__dict__)
        self.es.index(index="pits", doc_type="_doc", id=id, body=json_team_member)

    def add(self, id, team_member):
        json_team_member = json.dumps(team_member.__dict__)
        self.es.index(index="pits", doc_type="_doc", id=id, body=json_team_member)

    def get_all(self):
        team_members = []
        result = self.es.search(index="pits", doc_type="_doc", body={"query": {"match_all": {}}})

        for hit in result['hits']['hits']:
            hit['_source']['id']=hit['_id']
            team_member=hit['_source']
            team_members.append(team_member)

        return team_members
Exemple #23
0
class ElasticSearch(object):
    def check_node_status(self):
        res = requests.get('http://localhost:9200')
        if res.status_code == 200:
            return (res.content)
        return None

    def connect_es(self):
        self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

    def add_document(self, id, data):
        self.es.index(index='green_bond', doc_type='report', id=id, body=data)

    def update_document(self, index, doc, id, data):
        self.es.delete(index=index, doc_type=doc, id=id)
        self.add_document(id, data)

    def check_document_exists(self, index, doc, id):
        return self.es.exists(index=index, doc_type=doc, id=id)

    def find_document(self, index, doc, id):
        return self.es.get(index=index, doc_type=doc, id=id)

    def get_all_document(self, index, doc):
        return self.es.search(index=index, doc_type=doc, size=1000, pretty=1)
Exemple #24
0
def deleteKids(ids):
    from elasticsearch import Elasticsearch
    {
        "nested": {
            "path": "is_conflation_of",
            "query": {
                "nested": {
                    "path": "is_conflation_of.types",
                    "query": {
                        "terms": {
                            "is_conflation_of.place_id": ids
                        }
                    }
                }
            }
        }
    }
    q = {"query": {"terms": {"": ds}}}
    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    for i in ids:
        try:
            es.delete(index='whg', doc_type='place', id=i)
        except:
            print('failed delete for: ', id)
            pass
Exemple #25
0
def delete_post():
    # "POST /delete/post" -> "deleted.html"のレンダリング
    title = get_title('削除完了')
    isbn10 = request.form['isbn10']  # 削除対象書籍ISBN-10コード

    es = Elasticsearch('elasticsearch')
    book_title = es.get_source(index='book', id=isbn10)['title']  # 削除対象書籍タイトル
    es.delete(index='book', id=isbn10)  # bookインデックスから対象書籍削除
    logger.debug('書籍の削除に成功しました (ISBN-10: {})'.format(isbn10))

    es.indices.refresh(
        index='book')  # bookインデックス更新 <- 後のD2Vモデル再訓練時に削除した書籍が混入しないようにするため
    es.close()

    # 削除した書籍を推薦対象外とするため,削除ごとにDoc2Vecモデルを再構築
    global d2v
    d2v = Doc2VecWrapper(model_path=Path('/projects/model/d2v.model'),
                         initialize=True)
    d2v.train()

    return render_template('deleted.html',
                           shishosan=config['shishosan'],
                           title=title,
                           isbn10=isbn10,
                           book_title=book_title)
def run_clean(json_dict):
    global es_client
    try:
        es_client = Elasticsearch(hosts=es_hosts)
    except (ElasticsearchException, Exception) as ex:
        logger.warn('failed to connect to %s: %s' % (es_hosts, ex))

    es_index = json_dict.pop('index')
    es_type = json_dict.pop('type')
    search_project = json_dict.pop('project')
    search_body = {
        "query": {
            "bool": {
                "must": [
                    {
                        "terms": {
                            "project": [
                                search_project
                            ]
                        }
                    }
                ]

            }
        },
        "from": 0,
        "size": 50
    }

    for key, value in json_dict.items():
        search_body["query"]["bool"]["must"].append({
            "match_phrase": {
                key: value
             }
        })

    while True:
        try:
            logger.info('searching %s:%s from %d to %d ' %
                        (es_index, search_body["query"]["bool"]["must"],
                         search_body["from"], search_body["from"] + search_body["size"]))
            es_response = es_client.search(
                index=es_index,
                body=search_body,
                _source=False
            )
        except (ElasticsearchException, Exception) as ex:
            logger.warn('failed to search %s: %s' % (search_body, ex))
            break

        result_len = len(es_response['hits']['hits'])
        logger.info("search %d results" % result_len)
        if result_len == 0:
            break
        search_body["from"] += result_len

        for hit in es_response['hits']['hits']:
            logger.info("deleting %s" % (hit["_id"]))
            es_client.delete(index=es_index, doc_type=es_type, id=hit["_id"])
 def remove_duplicate(self):
     es = Elasticsearch(settings.ELASTIC_SEARCH_HOST)
     try:
         es.delete(index="full-search-base",
                   id="devon_bleibtrey",
                   doc_type="quest")
     except NotFoundError:
         pass
Exemple #28
0
class Search():

    def __init__(self, index_name):
        super().__init__()
        self.logger = logging.getLogger(__name__)
        self.__es = Elasticsearch(['pulsing.jhk.org:9200'], sniff_on_start=True)
        
        self.__index_name = index_name
        if self.__es.indices.exists(self.__index_name):
            self.logger.debug('index exists so deleting ' + self.__index_name)
            self.__es.indices.delete(self.__index_name)
        
        self.__es.indices.create(self.__index_name)
        self.__es.cluster.health(wait_for_status='yellow')
    
    def index(self, type_name, id_value, content):
        self.logger.debug('index %s/%s : %s', type_name, id_value, content)
        self.__es.index(index=self.__index_name, doc_type=type_name, id=id_value, body=content)
    
    def map(self, type_name, mapping):
        self.logger.debug('map %s', type_name)
        self.__es.indices.put_mapping(index=self.__index_name, doc_type=type_name, body={type_name: mapping})
    
    def search(self, type_name, query={'match_all': {}}):
        self.logger.debug('search %s : %s', type_name, query)
        return self.__es.search(index=self.__index_name, doc_type=type_name, body={'query': query})
    
    def get(self, type_name, id_value):
        self.logger.debug('get %s/%s', type_name, id_value)
        document = self.__es.get(index=self.__index_name, doc_type=type_name, id=id_value)
        self.logger.debug('got document ' + document)
        return document
    
    def delete(self, type_name, id_value):
        self.logger.debug('delete %s/%s', type_name, id_value)
        self.__es.delete(index=self.__index_name, doc_type=type_name, id=id_value)

    def optimize(self):
        """ 
        forcemerge allows removal of deleted documents and reducing the number of segments
        (documents are marked as tombstone [like cassandra] but not purged from the segment's 
        index for performance reasons)
        """
        self.logger.debug('optimize')
        self.__es.forcemerge(self.__index_name)

    @property
    def es(self):
        return self.__es

    def __eq__(self, other):
        return self.__es == other.__es

    def __str__(self):
        return self.__es.__str__()

    def __hash__(self):
        return self.__es.__hash__()
Exemple #29
0
class SearchIndex(metaclass=Singleton):
    def __init__(self):

        self.es = Elasticsearch([{"host": "localhost", "port": "9200"}])

        mapping = {
            "mappings": {
                "properties": {
                    "title_vector": {
                        "type": "dense_vector",
                        "dims": 128
                    },
                    "title_name": {
                        "type": "keyword"
                    },
                }
            }
        }

        try:
            self.es.indices.create(index="final_face_recognition",
                                   body=mapping)

        except RequestError:
            print("Index already exists!!")

    def push(self, emb, index, image_name=None):
        doc = {"title_vector": emb, "title_name": image_name}
        self.es.create("final_face_recognition", id=index, body=doc)

    def delete(self, index):
        self.es.delete(index="final_face_recognition", id=index)

    def search(self, emb, size):
        """
        size : # nearest neighbours
        """
        query = {
            "size": size,  # foe ex 5 nearest neighbours
            "query": {
                "script_score": {
                    "query": {
                        "match_all": {}
                    },
                    "script": {
                        "source":
                        "cosineSimilarity(params.queryVector, 'title_vector')+1",
                        # "source": "1 / (1 + l2norm(params.queryVector, 'title_vector'))", #euclidean distance
                        "params": {
                            "queryVector": list(emb)
                        },
                    },
                }
            },
        }

        res = self.es.search(index="final_face_recognition", body=query)
        return res
class XiaomiElasticSearchPipeline(object):
    items_buffer = []

    def __init__(self):
        self.settings = get_project_settings()
        # settings = get_project_settings()
        # self.settings = settings
        uri = "{}:{}".format(self.settings['ELASTICSEARCH_SERVER'], self.settings['ELASTICSEARCH_PORT'])
        self.es = Elasticsearch([uri])

        # uri = "%s:%d" % (self.settings['ELASTICSEARCH_SERVER'], self.settings['ELASTICSEARCH_PORT'])
        # self.es = Elasticsearch(, serializer=JSONSerializerPython2())
        # print uri

        # print type(settings)

    def index_item(self, item):
        index_name = self.settings['ELASTICSEARCH_INDEX']
        index_suffix_format = self.settings.get('ELASTICSEARCH_INDEX_DATE_FORMAT', None)

        if index_suffix_format:
            index_name += "-" + datetime.strftime(datetime.now(), index_suffix_format)

        index_action = {
            '_index': index_name,
            '_type': self.settings['ELASTICSEARCH_TYPE'],
            '_source': dict(item)
        }

        self.items_buffer.append(index_action)

        # index_name = self.settings['ELASTICSEARCH_INDEX']
        # self.es.index(index_name, doc_type="test-type", body=dict(item), id=item['appid'], op_type='create')

        if len(self.items_buffer) == self.settings.get('ELASTICSEARCH_BUFFER_LENGTH', 500):
            self.send_items()
            self.items_buffer = []

    def send_items(self):
        helpers.bulk(self.es, self.items_buffer)

    def process_item(self, item, spider):
        if isinstance(item, types.GeneratorType) or isinstance(item, types.ListType):
            for each in item:
                self.process_item(each, spider)
        else:
            self.index_item(item)
        # index_name = self.settings['ELASTICSEARCH_INDEX']
        # self.es.index(dict(item), index_name, self.settings['ELASTICSEARCH_TYPE'], op_type='create')
        logging.info("Remove old values in Elasticsearch if exit")
        self.es.delete(self.settings['ELASTICSEARCH_INDEX'], self.settings['ELASTICSEARCH_TYPE'], id=item['appid'], ignore=[400, 404])
        self.es.index(self.settings['ELASTICSEARCH_INDEX'], self.settings['ELASTICSEARCH_TYPE'], dict(item), id=item['appid'], op_type='create', )
        # self.es.index()
        return item

    def close_spider(self, spider):
        if not self.items_buffer:
            self.send_items()
Exemple #31
0
class IndexCreator(object):

    nameseg = NameSegmenter()
    identifier = Identifier()
    stopwords = stopword.get_standard_stopwords()
    seg = Segmenter()

    def __init__(self, es=None):

        global logger_index
        self.logger = logger_index
        if not es:
            host, port = tsbconfig.get_es_config()
            self.logger.info(','.join([host, port]))
            self.es = Elasticsearch([{'host': host, 'port': port}])
            # host, port, user, pswd = tsbconfig.get_es_config()
            # self.logger.info(','.join([host, port, user, pswd]))
            # self.es = Elasticsearch([{'host': host, 'port': port}], http_auth=(user, pswd))
        else:
            self.es = es
        self.logger.info(self.es)
        self.logger.info('Index Creator inited')

    def __check(self):

        if not self.es.indices.exists(["xiniudata"]):
            self.logger.info('Creating index xiniudata')
            self.es.indices.create("xiniudata")
            self.logger.info('Created')
        self.es.indices.put_mapping("company", mappings.get_company_mapping(),
                                    "xiniudata")
        self.logger.info('Company mapping created')
        self.es.indices.put_mapping("completion",
                                    mappings.get_completion_mapping(),
                                    "xiniudata")
        self.logger.info('Completion mapping created')
        self.es.indices.put_mapping("dealCompletion",
                                    mappings.get_deal_completion_mapping(),
                                    "xiniudata")
        self.logger.info('Deal Completion mapping created')
        self.es.indices.put_mapping("deal", mappings.get_deal_mapping(),
                                    "xiniudata")
        self.logger.info('Deal mapping created')
        self.es.indices.put_mapping("interior",
                                    mappings.get_company_interior_mapping(),
                                    "xiniudata")
        self.logger.info('Interior mapping created')
        self.es.indices.put_mapping("digital_token",
                                    mappings.get_digital_token_mapping(),
                                    "xiniudata")
        self.logger.info('Token mapping created')

    def delete_index(self, doc_type, id):

        try:
            self.es.delete('xiniudata', doc_type, id)
        except NotFoundError, efe:
            pass
Exemple #32
0
def delete(_id):
        try:
                from elasticsearch import Elasticsearch
                es = Elasticsearch()
                es.delete(index="pessoa", doc_type="_doc",id=str(_id))
                return jsonify({"resultado":"A entrada com id: " + str(_id) + ", foi efetuada"})
        
        except:
                return jsonify({"resultado":"Erro de execução"})
Exemple #33
0
def get_whois_ip(ip,refresh=None):
	es = Elasticsearch()
	print repr(ip)
	id_num = str(ip).replace(".","0")
	does_exist = es.exists(index='rwhois2', doc_type='ipaddr', id = id_num)
	print does_exist
	if does_exist is True and refresh is None:
		status = 200
		print "Found it!"
		get_record = es.get(index='rwhois2',doc_type='ipaddr', id = id_num)
		results = jsonify(get_record['_source'])
	elif does_exist is True and refresh is not None:
                status = 200
                print "Forcing refresh!"
                es.delete(index='rwhois2', doc_type='ipaddr', id = id_num)
                try:
                        ipwhois.net.socks.setdefaultproxy(ipwhois.net.socks.SOCKS5,"localhost")
			obj = IPWhois(ip)
                        try:
                                results_raw = obj.lookup_whois(get_referral=True,inc_nir=True)
                        except:
                                results_raw = obj.lookup_whois()

                        status = 200
                        results = jsonify(results_raw)
                        es.index(index='rwhois2', doc_type='ipaddr', id=id_num, body=results_raw)

                except Exception as e:
                        print e
                        results = jsonify({'status': "not_found"})
                        status = 404


	
	else:
		try:
			obj = IPWhois(ip)
			try:
				results_raw = obj.lookup_whois(get_referral=True)
			except:
				results_raw = obj.lookup_whois()
			status = 200
			results = jsonify(results_raw)
			id_num = str(ip).replace(".","0")
                        print results
                        try:
				es.index(index='rwhois2', doc_type='ipaddr', id=id_num, body=results_raw)
			except Exception as e:
				print "Elasticsearch encountered a problem ", e
                                pass
		except Exception as e:
                        #print results_raw
        	        print e
                	results_raw = jsonify({'status': "not_found"})
	                status = 404
        	        results = jsonify({'status': "not_found"})
        return results,status
Exemple #34
0
 def index_remove(self):
     es = Elasticsearch(settings.ELASTICSEARCH, timeout=60)
     name = self.get_basename()
     try:
         es.delete(index=name.lower(), doc_type="log", id=self.id)
     except Exception as e:
         logger.debug("delete index: %s" % e)
     self.is_indexed = False
     self.save()
Exemple #35
0
 def index_remove(self):
     es = Elasticsearch(settings.ELASTICSEARCH, timeout=60)
     name = self.get_basename()
     try:
         es.delete(index=name.lower(), doc_type="log", id=self.id)
     except Exception as e:
         logger.debug("delete index: %s" % e)
     self.is_indexed = False
     self.save()
Exemple #36
0
def deleteDocs(ids):
    from elasticsearch import Elasticsearch
    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    for i in ids:
        try:
            es.delete(index='whg', doc_type='place', id=i)
        except:
            print('failed delete for: ', id)
            pass
Exemple #37
0
 def remove_duplicate_plebs_search(self):
     es = Elasticsearch(settings.ELASTIC_SEARCH_HOST)
     for pleb in Pleb.nodes.all():
         try:
             es.delete(index='full-search-base',
                       doc_type='profile',
                       id=pleb.username)
         except NotFoundError:
             pass
Exemple #38
0
def search(keys):
    service = 'es'
    region = 'us-west-2'
    # headers = {"Content-Type" : "application/json"}
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key,
                       credentials.secret_key,
                       region,
                       service,
                       session_token=credentials.token)

    es = Elasticsearch(hosts=[{
        'host': HOST,
        'port': 443
    }],
                       http_auth=awsauth,
                       use_ssl=True,
                       verify_certs=True,
                       connection_class=RequestsHttpConnection)

    results = []

    for key in keys:
        # print(key)
        r = es.search(index="event",
                      body={
                          "from": 0,
                          "size": 10,
                          "query": {
                              "match": {
                                  "labels": key
                              }
                          }
                      })
        for r in r["hits"]["hits"]:
            print('find', r)
            esId = r['_id']
            eventTime = r['_source'].get('eventTime')
            # print(eventTime)

            try:
                if not eventTime or datetime.datetime.strptime(
                        eventTime,
                        '%Y-%m-%d %H:%M:%S.%f') < datetime.datetime.now():
                    es.delete(index="event", id=esId)
                    print('delete', r)
                    continue
            except:
                es.delete(index="event", id=esId)
                print('delete', r)
                continue

            eventId = r['_source']['eventId']
            results.append(eventId)
            print('put', r)

    return results
Exemple #39
0
def delete_course_from_index(target):
    es = Elasticsearch()

    for language in settings.LANGUAGE_CODES:
        try:
            es.delete(index='courses',
                      doc_type='course_%s' % language,
                      id=target.id)
        except TransportError:
            pass
def deleteQuestion( request, id ):
    #get the response using id and search for the question
    es = ES('http://127.0.0.1:9200/')
    
    es.delete( index="test-rule1", doc_type="book", id=id )
    
    template = loader.get_template('polls/qdelete.html')
    context = RequestContext( request )
    
    return HttpResponse(template.render(context))    
Exemple #41
0
class ESClient:
    def __init__(self, es_params):
        self.es = Elasticsearch(es_params)

    def get_metric_metadata(self, metric_name, tenant_id):
        """
        Get document from index metric_metadata for a given metric name and tenant id
        """

        document_id = self.get_document_id(tenant_id=tenant_id, metric_name=metric_name)
        try:
            return self.es.get(index='metric_metadata', doc_type='metrics', id=document_id, routing=tenant_id)
        except NotFoundError as e:
            return e.info

    def get_enums_data(self, metric_name, tenant_id):
        """
        Get document from index enums for a given metric name and tenant id
        """

        document_id = self.get_document_id(tenant_id=tenant_id, metric_name=metric_name)

        try:
            return self.es.get(index='enums', doc_type='metrics', id=document_id, routing=tenant_id)
        except NotFoundError as e:
            return e.info

    def delete_metric_metadata(self, metric_name, tenant_id):
        """
        Delete document from index metric_metadata for metric_metadata dictionary(obtained from get_metric_metadata
        call) and tenant id
        """

        document_id = self.get_document_id(tenant_id=tenant_id, metric_name=metric_name)
        self.es.delete(index='metric_metadata', doc_type='metrics', id=document_id, routing=tenant_id)
        print 'Deleted from index metric_metadata for _id: [%s] routing: [%s]' % (document_id, tenant_id)

    def delete_enums_data(self, metric_name, tenant_id):
        """
        Delete document from index enums for enums dictionary(obtained from get_enums_data
        call) and tenant id
        """
        document_id = self.get_document_id(tenant_id=tenant_id, metric_name=metric_name)
        self.es.delete(index='enums', doc_type='metrics', id=document_id, routing=tenant_id)
        print 'Deleted from index enums for _id: [%s] routing: [%s]' % (document_id, tenant_id)

    def get_document_id(self, tenant_id, metric_name):
        """
        Construct _id of elastic search from tenant id and metric name
        """
        return tenant_id + ':' + metric_name
class ElasticSearchDb(PersistenceBase):
    def __init__(self):
        base = PersistenceBase()
        base.__init__()
        self.session = Elasticsearch()
        self.database = Config().elasticsearchindex
        if (not self.session.indices.exists(index=self.database)):
            self.session.indices.create(index=self.database)

    def selectalltables(self):
        tables = []
        res = self.session.indices.get_mapping(index=self.database)
        for map in res[self.database]['mappings']:
            tables.append(map)
        return tables

    def selectallcolumns(self, tablename):
        columns = []
        res = self.session.indices.get_mapping(index=self.database)
        for column in res[self.database]['mappings'][tablename]['properties']:
            columns.append(column)
        return columns

    def selectall(self, tablename):
        res = self.session.get(index=self.database, doc_type=tablename)
        return res

    def selectone(self, tablename, id):
        queryfilter = {'uuid': id}
        res = self.session.get(index=self.database, doc_type=tablename, body={'query': queryfilter})
        return res

    def insert(self, obj, tablename):
        obj.updatedAt = datetime.isoformat(datetime.now())
        obj.uuid = str(self.getuuid())
        serialized_obj = self.getallvaluesfromobject(obj)
        self.session.index(index=self.database, doc_type=tablename, body=serialized_obj, id=obj.uuid)

    def update(self, obj, tablename):
        obj.updatedAt = datetime.isoformat(datetime.now())
        self.session.update(index=self.database, doc_type=tablename, id=obj.uuid)

    def delete(self, obj, tablename):
        #todo: add to a table that manage deleted items (just to know if the obj was deleted and we don't have to add again)
        self.session.delete(index=self.database, doc_type=tablename, id=obj.uuid)

    def getallvaluesfromobject(self, obj):
        print(obj)
        ret = json.dumps(obj, default=lambda o: o.__dict__)
        return ret
Exemple #43
0
    def get(self, request, *args, **kwargs):
        note_id = args[0]
        es = Elasticsearch("s3.zserg.net:9200")
        res = es.delete(index="notes", doc_type='note', id = note_id)

        context = {'result':res['found'], 'note_id':note_id}
        return render(request, self.template_name, context)
class ElasticSearchProvider(object):
    """
    elasticsearch全文搜索SDK
    """

    def __init__(self, hosts=None):
        self._es = Elasticsearch(hosts)

    def insert(self, index, doc_type, doc):
        """
        :arg schema: es的_index
        :arg table: es的_type
        :arg row: 需要更新的doc
        """
        res = self._es.index(index, doc_type, doc, doc['id'])
        return res['created']

    def update(self, index, doc_type, doc):
        """
        :arg schema: es的_index
        :arg table: es的_type
        :arg row: 需要更新的doc
        """
        self._es.index(index, doc_type, doc, doc['id'])
        return True


    def delete(self, index, doc_type, doc):
        """
        :arg schema: es的_index
        :arg table: es的_type
        :arg row: 需要更新的doc
        """
        res = self._es.delete(index, doc_type, doc['id'])
        return res['found']
Exemple #45
0
class ObjectManager(object):

    def __init__(self, index, doc_type, model_class):
        super(ObjectManager, self).__init__()
        self.index = index
        self.doc_type = doc_type
        self.model_class = model_class
        self.es = Elasticsearch()
        self.mapper = ObjectMapper()

    def find_one(self, pk):
        source_dict = self.es.get(index=self.index, doc_type=self.doc_type, id=pk)
        return self.mapper.from_dict_to_model(source_dict, self.model_class)

    def save(self, model):
        model_dict = self.mapper.from_model_to_dict(model)
        res = self.es.index(index=self.index, doc_type=self.doc_type, id=model.get_identity(), body=model_dict)
        return res['created']

    def find_all(self):
        res = self.es.search(index=self.index, doc_type=self.doc_type, body={"query": {"match_all": {}}})
        return [self.mapper.from_dict_to_model(model, self.model_class) for model in res['hits']['hits']]

    def update(self, model):
        model_dict = self.mapper.from_model_to_dict(model)
        res = self.es.update(index=self.index, doc_type=self.doc_type, id=model.pk, body={"doc": model_dict})
        return res

    def delete(self, pk):
        return self.es.delete(index=self.index, doc_type=self.doc_type, id=pk)
Exemple #46
0
class ESIndex:
    def __init__(self, hosts, index = "", doc_type = ""):
        self.es = Elasticsearch(hosts)
        self.index = index
        self.doc_type = doc_type

    def index(self, doc_id, body, index = "", doc_type = ""):
        index_ = self.index if index == "" else index
        doc_type_ = self.doc_type if doc_type == "" else doc_type
        return self.es.index(index=index_, doc_type=doc_type_, body=body, id=doc_id)

    def delete(self,doc_id, index = "", doc_type = ""):
        index_ = self.index if index == "" else index
        doc_type_ = self.doc_type if doc_type == "" else doc_type
        return self.es.delete(index=index_, doc_type = doc_type_, id = doc_id)
        
    def bulk(self, docs, index = "", doc_type = "", op_type = 'index'):
        '''
        bulk sample:
        {"_op_type":"index", _index" : "test", "_type" : "type1", "_id" : "1" , "_source":{"field1":"value1", "field2":"value2"}}
        { "_op_type":"delete" ,  "_index" : "test", "_type" : "type1", "_id" : "2" } 

        '''
        index_ = self.index if index == "" else index
        doc_type_ = self.doc_type if doc_type == "" else doc_type
 
        allow_op = ['index', 'delete']
        if op_type not in allow_op:
            raise exceptions.RequestError(400, '{"msg":"op_type is not allowed, you can use index or delete"}')

        actions = []
        for doc in docs:
            action = {}
            action["_index"] = index_
            action["_type"] = doc_type_
            action["_id"] = doc["_id"]
            if op_type == 'index':
                del doc["_id"]
                action["_source"] = doc
            action["_op_type"] = op_type
            actions.append(action)

        return helpers.parallel_bulk(self.es, actions)

    def getDoc(self,doc_id, index = "", doc_type = ""):
        index_ = self.index if index == "" else index
        doc_type_ = self.doc_type if doc_type == "" else doc_type
 
        return self.es.get(index=index_, doc_type=doc_type_, id=doc_id)

    def putMapping(self, body, index = "", doc_type =""):
        index_ = self.index if index == "" else index
        doc_type_ = self.doc_type if doc_type == "" else doc_type
        return self.es.indices.put_mapping(index=index_, doc_type=doc_type_, body=body)

    def create(self, body = {}, index = "", timeout = 30):
        index_ = self.index if index == "" else index
        return self.es.indices.create(index_, body=body)
Exemple #47
0
class ProjectDB(BaseProjectDB):
    __type__ = 'project'

    def __init__(self, hosts, index='pyspider'):
        self.index = index
        self.es = Elasticsearch(hosts=hosts)

        self.es.indices.create(index=self.index, ignore=400)
        if not self.es.indices.get_mapping(index=self.index, doc_type=self.__type__):
            self.es.indices.put_mapping(index=self.index, doc_type=self.__type__, body={
                "_all": {"enabled": False},
                "properties": {
                    "updatetime": {"type": "double"}
                }
            })

    def insert(self, name, obj={}):
        obj = dict(obj)
        obj['name'] = name
        obj['updatetime'] = time.time()

        obj.setdefault('group', '')
        obj.setdefault('status', 'TODO')
        obj.setdefault('script', '')
        obj.setdefault('comments', '')
        obj.setdefault('rate', 0)
        obj.setdefault('burst', 0)

        return self.es.index(index=self.index, doc_type=self.__type__, body=obj, id=name,
                             refresh=True)

    def update(self, name, obj={}, **kwargs):
        obj = dict(obj)
        obj.update(kwargs)
        obj['updatetime'] = time.time()
        return self.es.update(index=self.index, doc_type=self.__type__,
                              body={'doc': obj}, id=name, refresh=True, ignore=404)

    def get_all(self, fields=None):
        for record in elasticsearch.helpers.scan(self.es, index=self.index, doc_type=self.__type__,
                                                 query={'query': {"match_all": {}}},
                                                 _source_include=fields or []):
            yield record['_source']

    def get(self, name, fields=None):
        ret = self.es.get(index=self.index, doc_type=self.__type__, id=name,
                          _source_include=fields or [], ignore=404)
        return ret.get('_source', None)

    def check_update(self, timestamp, fields=None):
        for record in elasticsearch.helpers.scan(self.es, index=self.index, doc_type=self.__type__,
                                                 query={'query': {"range": {
                                                     "updatetime": {"gte": timestamp}
                                                 }}}, _source_include=fields or []):
            yield record['_source']

    def drop(self, name):
        return self.es.delete(index=self.index, doc_type=self.__type__, id=name, refresh=True)
class DataCatalogModel(object):

    """
    Base for the application's model classes.
    """

    def __init__(self):
        self._config = DCConfig()
        self._log = logging.getLogger(type(self).__name__)
        self._elastic_search = Elasticsearch(
            '{}:{}'.format(self._config.elastic.elastic_hostname,
                           self._config.elastic.elastic_port))

    def _get_entry(self, entry_id):
        """
        shortcut to ElasticSearch.get function
        Standard elastic (index/doc_type) params are added
        :param entry_id: elastic search id
        :raises NotFoundError: entry not found in Elastic Search
        :raises ConnectionError: problem with connecting to Elastic Search
        :return: elastic search structure
        """
        return self._elastic_search.get(
            index=self._config.elastic.elastic_index,
            doc_type=self._config.elastic.elastic_metadata_type,
            id=entry_id)

    def _delete_entry(self, entry_id):
        """
        shortcut to ElasticSearch.delete function
        data flush is performed after delete
        Standard elastic (index/doc_type) params are added
        :param entry_id: elastic search id
        :raises NotFoundError: entry not found in Elastic Search
        :raises ConnectionError: problem with connecting to Elastic Search
        :rtype: None

        """
        self._elastic_search.delete(
            index=self._config.elastic.elastic_index,
            doc_type=self._config.elastic.elastic_metadata_type,
            id=entry_id)

        # flushing data - so immediate searches are aware of change
        self._elastic_search.indices.flush()
Exemple #49
0
def delete_river_config(river, name, host="localhost", port=9200):
	"""
	Removes the given Elasticsearch River configuration from Elasticsearch.

	The given river type and name are formatted like "$river-$name" to form the type name.

	Arguments
	---------
	river: string
		Name of the River plugin to use.
	name: string
		Name of this instance of the River plugin.
	host: string
		Elasticsearch host to connect to, defaults to localhost.
	port: int
		Elasticsearch port to connect to, defaults to 9200.
	"""
	elastic = Elasticsearch(hosts=[{ "host": host, "port": port }])
	elastic.delete(index="_river", doc_type="{0}-{1}".format(river, name), id=None)
Exemple #50
0
def fixsmallblogs ():
    client = Elasticsearch([Util.config['eshost']])

    response = client.search(
                index="blogs",
                body={
                    "size": 400,
                      "query" : {
                        "range": {
                          "length": {
                            "lte": 50
                          }
                        }
                      }
                }
            )

    for hit in response['hits']['hits']:
        print hit['_id']
        client.delete('blogs','blog', hit['_id'])
Exemple #51
0
class Es(object):
    '''just es '''

    def __init__(self, es_index=None, es_type=None):
        '''es'''
        hosts = HOSTS
        if es_index:
            self.es_index = es_index
        else:
            self.es_index = ES_INDEX
        if es_type:
            self.es_type = es_type
        else:
            self.es_type = ES_TYPE;
        self.es_client = Elasticsearch( hosts, cluster=ES_CLUSTER )
        #print self.es_client.info()

    def index( self , d_id, doc):
        '''索引信息
            id : userid
            doc : {}
        '''
        res = self.es_client.index(index=self.es_index, doc_type=self.es_type, id=d_id, body=doc)

    def search( self, query):
        '''q'''
        response = self.es_client.search( body=query)
        print response['hits']

    def delete( self, doc_id ):
        '''delete'''
        self.es_client.delete(index=self.es_index, doc_type=self.es_type, id=doc_id )

    def upsert( self, doc_id, doc_update, doc_index, script=None ):
        '''添加更新'''
        try:
            self.es_client.update( index=self.es_index, doc_type=self.es_type, id=doc_id, body=doc_update,  script=script, lang='mvel' )
        except Exception, e:
            print e
            self.index( doc_id, doc_index )
Exemple #52
0
def app_embedded(app_id):
    try:
        embedded_info = ApplicationEmbedded.objects(application_id=request.app.id).get()
    except ApplicationEmbedded.DoesNotExist:
        embedded_info = None

    if request.method == 'POST':
        use_embedded = request.form.get('use') is not None
        embedded_iframe_uri = request.form.get('uri')
        campus_ids = request.form.getlist('campus')

        from elasticsearch import Elasticsearch
        es = Elasticsearch(hosts=app.config.get('ELASTICSEARCH_HOSTS'))

        if not use_embedded and embedded_info:
            es.delete(index='embedded_app_list', doc_type='embedded_app', id=str(embedded_info.id))
            embedded_info.delete()
            embedded_info = None
        elif use_embedded:
            if not embedded_info:
                embedded_info = ApplicationEmbedded()
                embedded_info.application_id = request.app.id

            embedded_info.iframe_uri = embedded_iframe_uri
            embedded_info.campus_ids = campus_ids
            embedded_info.save()
            embedded_info = ApplicationEmbedded.objects(application_id=request.app.id).get()

            es.index(index='embedded_app_list', doc_type='embedded_app', id=str(embedded_info.id), body={
                'app_id': str(request.app.id),
                'name': request.app.name,
                'description': request.app.description,
                'campus_ids': campus_ids
            })

    return render_template('developer/console/app/embedded.html',
                           campuses=Campus.objects(), use_embedded=embedded_info is not None,
                           embedded_iframe_uri=embedded_info.iframe_uri if embedded_info else '',
                           use_campus_ids=embedded_info.campus_ids if embedded_info else [])
class ElasticSearchHandler:
    def __init__(self, host=None):
        if not host:
            host = os.getenv("ES_HOST")
        self.es_handle = Elasticsearch(hosts=host)

    def check_index(self, index):
        return self.es_handle.indices.exists(index)

    def create_index(self, index):
        if not self.check_index(index):
            self.es_handle.indices.create(index)

    def add_to_es(self, index, document_type, body):
        return self.es_handle.create(index=index, doc_type=document_type, body=body)

    def get_from_es(self, index, id):
        return self.es_handle.get(index, id=id)


    def delete_by_id(self, index, document_type, id):
        self.es_handle.delete(index, document_type, id)
def main():
    es = Elasticsearch()
    # query =
    dashboards = {}

    for hit in es.search(index="kibana-int", doc_type="dashboard", size=1000)["hits"]["hits"]:
        data = hit["_source"]
        dashboards[hit["_id"]] = hit["_source"]


    for id_, data in dashboards.items():
        dashboard = json.loads(data["dashboard"])

        # Here the modification takes place
        if dashboard["index"]["pattern"] == ORIG_INDEX_PATTERN:
            dashboard["index"]["pattern"] = NEW_INDEX_PATTERN

        dashboards[id_]["dashboard"] = json.dumps(dashboard)

    for id_, data in dashboards.items():
        es.delete(index="kibana-int", doc_type="dashboard", id=id_)
        es.create(index="kibana-int", doc_type="dashboard", id=id_, body=data)
Exemple #55
0
def update_video_index(sender, instance=None, created=False, **kwargs):
    list_of_models = ('ChapterPods', 'EnrichPods', 'ContributorPods', 'Pod')
    if sender.__name__ in list_of_models: # this is the dynamic part you want
        pod = None
        if sender.__name__ == "Pod":
            pod = instance
        else:
            pod = instance.video
        es = Elasticsearch(ES_URL)
        if pod.is_draft == False and pod.encodingpods_set.all().count() > 0:
            res = es.index(index="pod", doc_type='pod', id=pod.id, body=pod.get_json_to_index(), refresh=True)
        else:
            delete = es.delete(index="pod", doc_type='pod', id=pod.id, refresh=True, ignore=[400, 404])
Exemple #56
0
def get_whois_domain(domain,refresh=None):
        es = Elasticsearch()
        id_num = domain
        does_exist = es.exists(index='domain', doc_type='domain', id = domain)
        print does_exist
        if does_exist is True and refresh is None:
                status = 200
                print "Found it!"
                get_record = es.get(index='domain',doc_type='domain', id = domain)
                results = jsonify(get_record['_source'])
	elif does_exist is True and refresh is not None:
		status = 200
		print "Forcing refresh!"
		es.delete(index='domain', doc_type='domain', id = domain)
                try:
                        obj = whois.whois(domain)
                        status = 200
                        results = jsonify(obj)
                        es.index(index='domain', doc_type='domain', id=domain, body=obj)

                except Exception as e:
                        print e
                        results_raw = jsonify({'status': "not_found"})
                        status = 404
	     	
		
        else:
                try:
                        obj = whois.whois(domain)
                        status = 200
                        results = jsonify(obj)
                        es.index(index='domain', doc_type='domain', id=domain, body=obj)

                except Exception as e:
                        print e
                        results_raw = jsonify({'status': "not_found"})
                        status = 404
                        results = jsonify({'status': "not_found"})
        return results,status
def reindex_language():
    rd = redis.StrictRedis(host='localhost', port=6379, db=0)
    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

    body = {'query': {'match_all': {}}}
    result = es.search(index='throwtable', doc_type='implementation',
        body=body, size=34589)

    for impl in result['hits']['hits']:
        language = impl['_source']['language']
        if rd.hexists('rosetta-language-mapping', language):
            print impl['_id'],
            language = rd.hget('rosetta-language-mapping', language)
            print '->', language
            print 'new id = ', replace_lang_from_id(impl['_id'],
                language.decode('utf8'))
            source = impl['_source']
            source['language'] = language.decode('utf8')
            es.index(index='throwtable', doc_type='implementation',
                id=replace_lang_from_id(impl['_id'], language.decode('utf8')),
                body=source)
            es.delete(index='throwtable', doc_type='implementation',
                id=impl['_id'])
Exemple #58
0
def removealltravelblogblogs ():
    client = Elasticsearch([Util.config['eshost']])

    response = client.search(
                index="blogs",
                body={
                    "size": 400,
                     "query": {
                        "prefix": {
                          "url.rawurl": {
                            "value": "http://www.travelblog.org"
                          }
                        }
                      }
                }
            )



    for hit in response['hits']['hits']:
        print hit['_id']
        Util.deletefromazure(hit['_id'])
        client.delete('blogs','blog', hit['_id'])
Exemple #59
0
class ElasticsearchBackend(BaseBackend):
    def __init__(self, conn):
        if Elasticsearch is None:
            raise ImportError("Plz. install elasticsearch library for ElasticsearchBackend.")
        self._es = Elasticsearch(**conn.options.get('elasticsearch', {}))
        super().__init__(conn)

    def _gen_es_id_for_data(self, schema_name, _data):
        key_names = self._conn.schema.get_primary_key(schema_name)
        return '_n_'.join([str(_data[key]) for key in key_names])

    def _gen_es_id_for_id(self, _id):
        if isinstance(_id, str):
            return _id
        elif isinstance(_id, (tuple, list)):
            return '_n_'.join(_id)
        else:
            return _id

    def put_item(self, schema_name, _data, overwrite=False):
        op_type = 'create' if not overwrite else 'index'
        result = self._es.index(index=schema_name, doc_type=schema_name, id=self._gen_es_id_for_data(schema_name, _data), body=_data, op_type=op_type)
        return result.get('_version', 0) > 0

    def get_item(self, schema_name, _id):
        try:
            result = self._es.get(index=schema_name, doc_type=schema_name, id=self._gen_es_id_for_id(_id))
        except NotFoundError:
            raise ItemNotFound("Item not found for id {} in {}.".format(_id, schema_name))
        return result['_source']

    def delete_item(self, schema_name, _id):
        result = self._es.delete(index=schema_name, doc_type=schema_name, id=self._gen_es_id_for_id(_id))
        return result['found'] is True

    def query(self, schema_name, _w, limit=10):
        return self.scan(schema_name, _w, limit)

    def scan(self, schema_name, _w, limit=10):
        query = elastic_parse_wt(_w, {})
        query["size"] = limit
        result = self._es.search(index=schema_name, doc_type=schema_name, body=query)
        return [hit['_source'] for hit in result["hits"]["hits"]]

    def query_count(self, schema_name, _w):
        query = elastic_parse_wt(_w, {})
        result = self._es.count(index=schema_name, doc_type=schema_name, body=query)
        return result.get('count', 0)
def correct_false_warning(judge_image_dir):
    es = Elasticsearch(esport)
    if 'file_name' in request.args:
        md5_file_name = hashlib.md5(request.args['file_name']).hexdigest()
        print md5_file_name + ' for ' + request.args['file_name']
    else:
        del(es)
        return 'Error: no file name in request\n'
    judge_image_dir = 'judgeresult:' + judge_image_dir
    try:
        res = es.delete(index = judge_image_dir, doc_type = 'judgeResult', id = md5_file_name)
    except:
        del(es)
        return 'Error: file do not exist\n'
    del(es)
    return json.dumps(res['_shards'])