class ElasticStorage(BaseStorage): def __init__(self, config): if not Elasticsearch: raise ImportError("elasticsearch-py is required to use Elasticsearch as storage.") if not Search: raise ImportError("elasticsearch_dsl is required to use Elasticsearch as storage.") self.name = 'elasticsearch' self.storage = Elasticsearch(**config) def keys(self, pattern="*"): return self.storage.keys(pattern) def set_val(self, key, val): body = { 'key': key, 'val': ','.join(map(str, val[0])), 'extra': str(val[1]) } self.storage.index(index='sift', doc_type='sift', body=body) def get_val(self, key): s = Search(using=self.storage, index='sift') return s.filter('term', key=key).execute().hits.hits def append_val(self, key, val): self.set_val(key, val) def get_list(self, key): return self.get_val(key)
def do_es(self, results, t): if HAVE_ELASTICSEARCH: try: es = Elasticsearch( hosts = [{ 'host': reporting_conf.elasticsearchdb.host, 'port': reporting_conf.elasticsearchdb.port, }], timeout = 60 ) except Exception as e: raise CuckooReportError("Cannot connect to ElasticSearch DB") return index_prefix = reporting_conf.elasticsearchdb.index idxdate = results["info"]["started"].split(" ")[0] index_name = '{0}-{1}'.format(index_prefix, idxdate) report = {} report["task_id"] = results["info"]["id"] report["info"] = results.get("info") report["target"] = results.get("target") report["summary"] = results.get("behavior", {}).get("summary") report["network"] = results.get("network") report["virustotal"] = results.get("virustotal") report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) # Store the report and retrieve its object id. es.index(index=index_name, doc_type="analysis", id=results["info"]["id"], body=report)
def createIndex(): """This endpoint should be used to index pages of a Mouchak installation. FIXME: - Endpoint is only accessible from the index page of search service. - Does not support cross origin requests. - Better name for the function. """ es = Elasticsearch() if not es.indices.exists(urlparse(request.form['url']).netloc): url = request.form['url'] if not request.form['url'].endswith('/'): url = request.form['url'] + '/' try: contents = requests.get(url + "pages").json() for content in contents: es.index(index=urlparse(request.form['url']).netloc, doc_type="html", body=content, id=content['id']) response = make_response() response.data = "Website indexed." return response except: response = make_response() response.status_code = 204 return response else: response = make_response() response.status_code = 409 response.data = {"reason": "Index already exists"} return response
def loadDatainES(filename, index, doctype,dataFileType,hostname="localhost",port=9200,mappingFilePath=None,username="",password="",protocol="http"): try: print "Connecting to " + hostname + " at port:" + str(port) # es = Elasticsearch([{'host': hostname, 'port': port}]) if username != "" and password != "": es = Elasticsearch([protocol + '://' + username + ':' + password + '@'+hostname + ":" + str(port)],show_ssl_warnings=False) else: es = Elasticsearch([protocol + '://'+hostname + ":" + str(port)],show_ssl_warnings=False) if mappingFilePath: with open(mappingFilePath) as m: mapping = m.read() #print "Mapping file:" + mapping es.indices.create(index=index, body=mapping,ignore=400) if dataFileType=="1": with open(filename) as f: d = json.load(f) for wp in d: res = es.index(index=index,doc_type=doctype,body=wp,id=wp["uri"]) print "indexing id: " + res["_id"] + " for uri: " + wp["uri"] elif dataFileType == "0": with open(filename) as f: lines = f.readlines() for line in lines: if line.strip() != "": jsonurlobj = json.loads(line.strip()) objkey = jsonurlobj['uri'] res = es.index(index=index,doc_type=doctype,body=line) print "indexing id: " + res["_id"] + " for uri: " + objkey except Exception, e: print >> stderr.write('ERROR: %s\n' % str(e))
def loadNerOutputs(anndir): #setIds = ["Citalopram-4259d9b1-de34-43a4-85a8-41dd214e9177","Escitalopram-13bb8267-1cab-43e5-acae-55a4d957630a","Fluoxetine-5f356c1b-96bd-4ef1-960c-91cf4905e6b1"] #setIds = ["55816042-946d-4bec-9461-bd998628ff45","c00d1607-ac36-457b-a34b-75ad74f9cf0a","70b079e2-a1f7-4a93-8685-d60a4d7c1280","38642D80-AAA6-4196-A033-3977FF35B48A"] ann_ner = loadJsonFromDir(anndir) #print len(ann_ner) #idx = 1 for ann in ann_ner: #if ann["setId"] in setIds: dict_paras = parseSingleResource(ann) ann_domeo = buildAnnotation(dict_paras, SAMPLE_DOMEO) # load all annotations if ann_domeo: # load 11 - 208 #if ann_domeo and (int(dict_paras["fileId"]) > 10): es = Elasticsearch() es.index(index="domeo", doc_type=COLLECTION, id=dict_paras["mongo_uuid"], body=json.dumps(ann_domeo)) insert_annotation(dict_paras) print "[INFO] load annotations:" +str(ann["setId"]) #print "load annotations for " + dict_paras["annotates_url"] #idx = idx + 1 else: print "[ERROR] annotation empty"
def es_index(self,p_host,p_port,p_index,p_doctype,p_docid,p_document): """ Indexes a document on an elasticsearch index according to a doctype and a docid {p_host} Elasticsearch server\n {p_port} Port of the es server\n {p_index} Name of the index to query\n {p_doctype} type of the document to index\n {p_docid} Id of the document to index\n {p_document} Document to index\n | es index | localhost | 9200 | myIndex | theDocType | id_457891 | {"adress":{"street":"myAdress", "city":"Wow city"}} | """ # Es client try: param = [{'host':p_host,'port':int(p_port)}] es = Elasticsearch(param) except Exception: raise AssertionError("Connexion error on %s:%i",p_host,int(p_port)) try: es.index(doc_type=p_doctype, id=p_docid, body=p_document, index=p_index) except Exception: raise AssertionError("Index error on %s:%i/%s for document : %s",p_host,int(p_port),p_index,p_document)
def index(self): es = Elasticsearch() es.index( index=ES_FAMILIAS_INDEX, doc_type=ES_FAMILIAS_DOC_TYPE, id=self.index_key(), body=self.index_dict())
def addNote(): es = Elasticsearch(['http://159.203.66.191:9200']) id = "" noteStr = "" if request.method == 'POST': id = request.form['id'] noteStr = request.form['note'] if len(noteStr.strip()) > 0 and len(id.strip()): note = {}; note["maintag"] = id note["body"] = noteStr es.index(index="brahman", doc_type='note', id=note["maintag"], body=note) return redirect(url_for('index')) elif request.method == "GET": id = request.args.get("id", "") if (len(id) > 0): note = {} try: res = es.get(index="brahman", doc_type='note', id=id) note["title"] = id; note["body"] = str(res['_source']['body']).strip() except TransportError as e: note["title"] = id; note["body"] = "" return render_template("addNote.html",note=note);
def indexpage_off(url): resp = requests.get(url) soup = BeautifulSoup(resp.text, 'html.parser') soup.get_text() es = Elasticsearch() es.index(index="bc", doc_type='webpage', body={"timestamp": datetime.now(),"text":soup.get_text(),"url":url}) return True
def scan_and_push_to_es(root_folder): """ scan files under root_folder and push to ES server :param root_folder: folder to scan in unicode :return: None """ assert root_folder[:2] == r'\\' machine = root_folder[2:][0:root_folder[2:].find('\\')] es = Elasticsearch() for root, dirs, files in os.walk(root_folder): for name in files: try: fullname = (os.path.join(root, name)).encode('utf-8') if check_if_already_exists(fullname): path = os.path.dirname(fullname) size = os.path.getsize(fullname.decode('utf-8')) # buggy when long name mtime = os.path.getmtime(fullname.decode('utf-8')) # buggy when long name doc = { 'machine': machine, 'path': path, 'full': fullname, 'name': name, 'size': size, 'mtime': str(datetime.fromtimestamp(int(mtime))) } sys.stdout.write('.') es.index(index="file-index", doc_type='file', body=doc) except Exception, e: pass
def Send_To_ElasticSearch(partition): print("send") tweets = list(partition) print(tweets,len(tweets)) elastic_search = Elasticsearch([{'host': 'localhost', 'port': 9200}]) if(elastic_search.indices.exists(index = "location6")): print("if") if(len(tweets) != 0): for tweet in tweets: doc = { "text": tweet['text'], "location6": { "lat": tweet['coordinates'][1], "lon": tweet['coordinates'][0] }, "sentiment":tweet['sentiment'] } if(tweet['coordinates'][1] != 0 and tweet['coordinates'][0] !=0 ): elastic_search.index(index="location6", doc_type='request-info', body=doc) else: print("else") mappings = { "mappings": { "request-info": { "properties": { "text": { "type": "text" }, "location6": { "type": "geo_point" }, "sentiment": { "type": "text" } } } } } elastic_search.indices.create(index='location6', body=mappings) if(len(tweets) != 0): for tweet in tweets: doc = { "text": tweet['text'], "location6": { "lat": tweet['coordinates'][1], "lon": tweet['coordinates'][0] }, "sentiment":tweet['sentiment'] } if(tweet['coordinates'][1] != 0 and tweet['coordinates'][0] !=0 ): elastic_search.index(index="location6", doc_type='request-info', body=doc)
def start_index(index_json, eventdata): print('start_index') with open(index_json) as json_file: index_dict = json.load(json_file, object_pairs_hook=OrderedDict) try: host_ip = index_dict['host'] index_name = index_dict['index_name'] except KeyError: sys.exit('The format of input JSON is not correct.') es = Elasticsearch(hosts=host_ip, timeout=120) host_url = 'http://' + host_ip + ':9200/' if not es.indices.exists(index=index_name): init_index(host_url, index_name, index_dict) data_row = dict() title = eventdata.question_text.strip().lower() number = eventdata.number search_query = 'Number: query'.replace('query', str(number)) matches = es.search(index=index_name, q=search_query, size=1000) hits = matches['hits']['hits'] frequencies = 1 frequencies += len(hits) data_row['Title'] = title data_row['Number'] = number data_row['Difficulty'] = eventdata.difficulty data_row['Note'] = eventdata.note.strip().lower() data_row['Method'] = eventdata.method.strip().lower() data_row['LogTime'] = eventdata.pub_date.strftime('%Y-%m-%dT%H:%M:%S') es.index(index_name, 'leet', data_row) print('Load to elasticsearch completed') return frequencies
def annotate(config, documentId): if "getPosTags" in config and config["getPosTags"] == False: return esClient = Elasticsearch(config["elasticsearch"]["host"] + ":" + str(config["elasticsearch"]["port"])) corpusIndex = config["corpus"]["index"] corpusType = config["corpus"]["type"] corpusFields = config["corpus"]["text_fields"] processorIndex = config["processor"]["index"] processorType = config["processor"]["type"] document = esClient.get(index=corpusIndex, doc_type=corpusType, id = documentId, fields=corpusFields) content = "" if "fields" in document: for field in corpusFields: if field in document["fields"]: if type(document["fields"][field]) is list: for element in document["fields"][field]: content += element + ". " else: content += document["fields"][field] + ". " annotatedDocument = {} sentences = nltk.sent_tokenize(content) posTaggedSentences = [] for sentence in sentences: sentence = sentence.strip() if len(sentence) > 1: sentence = sentence.replace("-", " ") sentenceWords = nltk.word_tokenize(sentence.lower()) sentenceWords = map(lambda x: x.replace(".", ""), sentenceWords) posTags = nltk.pos_tag(sentenceWords) posTaggedSentences.append(posTags) if esClient.exists(index=processorIndex, doc_type=processorType, id=document["_id"]): annotatedDocument = esClient.get(index=processorIndex, doc_type=processorType, id=document["_id"])["_source"] annotatedDocument["pos_tagged_sentences"] = posTaggedSentences esClient.index(index=processorIndex, doc_type=processorType, id=document["_id"], body=annotatedDocument) config["logger"].info("pos-processor: Annotated document '" + document["_id"] + "'")
class ElasticsearchUtils(object): def __init__(self, host_ports): # host_ports格式 [{'host':'xxx', 'port':9200},{}] self.host_ports = host_ports self.es = None def init_connect(self): self.es = Elasticsearch(self.host_ports) return self.es.ping() def get_search_result(self, index_name, type_name, query_body): if self.es: return self.es.search(index=index_name, doc_type=type_name, body=query_body) return def get_id_result(self, index_name, type_name, doc_id): if self.es: return self.es.get(index=index_name, doc_type=type_name, id=doc_id)['_source'] return # doc_id为None说明让es自动生成id def add_index_doc(self, index_name, type_name, doc_id, doc_body): if doc_id: self.es.index(index=index_name, doc_type=type_name, id=doc_id, body=doc_body) else: self.es.index(index=index_name, doc_type=type_name, body=doc_body) def batch_index(self, index_name, type_name, doc_body_lines): self.es.bulk(index=index_name, doc_type=type_name, body=doc_body_lines)
def do_POST(self): global csvPath try: content_len = int(self.headers.getheader('content-length', 0)) body = json.loads(self.rfile.read(content_len)) dict = {"url" : body['url'], "text" : body['text']} es = Elasticsearch() es.index(index="articles", doc_type="article", body=dict) with open(csvPath,'ab') as fout: writer = UnicodeWriter(fout,quoting=csv.QUOTE_ALL) writer.writerow(dict.values()) self.send_response(200) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write( json.dumps({"result":True}) ) except Exception, e: exc_type, exc_obj, exc_tb = sys.exc_info() print(" Type: %s | File: %s | Line number: %s " % (exc_type, os.path.abspath(__file__), exc_tb.tb_lineno)) print e.message self.send_response(500) self.send_header("Content-type", "application/json") self.end_headers() self.wfile.write( json.dumps({"result":False}) )
def handle(self, *args, **options): # Activate a fixed locale fr translation.activate('fr') es = Elasticsearch(ES_URL) if args: if args[0]=='__ALL__': delete = es.indices.delete(index='pod', ignore=[400, 404]) #delete = es.delete_by_query(index="pod", doc_type='pod', body={"query":{"match_all":{}}}) json_data = open('pods/search_template.json') es_template = json.load(json_data) try: create = es.indices.create(index='pod', body=es_template) #ignore=[400, 404] except TransportError as e: # (400, u'IndexAlreadyExistsException[[pod] already exists]') if e.status_code==400: print "l'index Pod est existant : %s" %e.error else: print "Une erreur est survenue lors de la creation de l'index : %s-%s" %( e.status_code, e.error) from pods.views import VIDEOS for pod in VIDEOS: res = es.index(index="pod", doc_type='pod', id=pod.id, body=pod.get_json_to_index(), refresh=True) else: for pod_id in args: try: pod = Pod.objects.get(pk=int(pod_id)) except Pod.DoesNotExist: raise CommandError('Pod "%s" does not exist' % pod_id) res = es.index(index="pod", doc_type='pod', id=pod.id, body=pod.get_json_to_index(), refresh=True) else: print "******* Warning : you must give some arguments : %s *******" %self.args
class IndexTalks: index_name = 'gc' doc_type = 'talk' def __init__(self): self.ft = FetchTalks() self.es = Elasticsearch() self.es_id_seq = 0 self.confId = '' def _FetchIndividualTalk(self, url): return urllib.request.urlopen(url) def FetchTalksAndIndexThem(self, weekendUrl): self.confId, talkUrls = self.ft.FetchTalks(weekendUrl) print(str.format('confId: {}, num talk urls: {}', self.confId, len(talkUrls))) for url in talkUrls: handle = self._FetchIndividualTalk(url) self._InsertOneTalkIntoES(handle, url) def _GetNextId(self): result = self.es_id_seq self.es_id_seq = self.es_id_seq + 1 return result def _GetTitleAndAuthor(self, line, tag, tagIndex): titleString = HtmlTagParser.GetTagContents(tag, line, tagIndex) print('title string: ' + titleString) titleSegments = titleString.split('-') title = titleSegments[0].strip() author = titleSegments[1].strip() if author.find('By') == 0: author = author[3:].strip() return ( title, author ) def _GetTitleAuthorContent(self, talkHandle): title = '' author = '' titleOpenTag = '<title>' titleFound = False talkContent = '' for line in talkHandle: #strLine = str(line) strLine = line.decode() talkContent = talkContent + strLine if titleFound == False: titleIndex = strLine.find(titleOpenTag) if titleIndex != -1: title, author = self._GetTitleAndAuthor(strLine, titleOpenTag, titleIndex) titleFound = True return ( title, author, talkContent ) def _InsertOneTalkIntoES(self, talkHandle, talkUrl): title, author, talkContent = self._GetTitleAuthorContent(talkHandle) idnum = self._GetNextId() idNumStr = str(idnum) print('indexing doc num: ' + idNumStr) json_body = json.dumps({'talkSortId': idNumStr, 'title': title, 'author': author, 'confid': self.confId, 'content': talkContent, 'url': talkUrl}) self.es.index(index=self.index_name, doc_type=self.doc_type, id=idnum, body=json_body)
class StreamingIndexer(TwythonStreamer): def __init__(self, consumer_key=None, consumer_secret=None, access_token=None, access_token_secret=None, es_host=None, es_port=None, es_index=None): super(StreamingIndexer, self).__init__(consumer_key, consumer_secret, access_token, access_token_secret) self._es = Elasticsearch([{'host': es_host, 'port': es_port}]) self._index = es_index def on_success(self, tweet): if 'delete' in tweet: status_id = tweet['delete']['status']['id'] self._es.delete(self._index, 'tweet', status_id) return if 'retweeted_status' in tweet: tweet = tweet['retweeted_status'] for url in tweet['entities']['urls']: if 'theguardian.com' in url['expanded_url']: url['domain'] = 'theguardian.com' self._es.index(index=self._index, doc_type='tweet', id=tweet['id_str'], body=tweet)
class ScoreMerge(object): def __init__(self): self.es = Elasticsearch() self.count = 0 self.category = open("/home/eunsoo/Downloads/tutorial/tutorial/category.txt", "r").read().split() def scoreMerge(self): self.es.indices.delete(index='merge', ignore=[400, 404]) for cat in self.category: self.count = self.count + 1 acc_score = 0.0 post_count = 0 search_results = self.es.search(index="scoretest", doc_type='categorized', body={"query": { "match": {"category": cat}}}) if('hits' in search_results): #print search_results['hits']['hits'] for search_result in search_results['hits']['hits']: acc_score = acc_score + search_result['_source']['score'] post_count = post_count + 1 self.es.index(index="mergetest", doc_type="merge", id = cat, body={"category": cat, "acc_score": acc_score, "post_count" : post_count }) print cat+"successfully merged" def displaySortedCategory(self): search_results = self.es.search(index="mergetest", doc_type='merge', body={"sort": {"acc_score": {"order": "desc"}}}) print search_results #print "category / acc_likes / acc_comments_count / acc_score / post_count" for search_result in search_results['hits']['hits']: print ("%s/%10f/%10f" %(search_result['_source']['category'],search_result['_source']['acc_score'],search_result['_source']['post_count']))
def index_data(index_name, epub_path): """Index magazine data""" es_client = Elasticsearch() files = list_epub_files(epub_path) for _file in files: data = parse_epub(_file) magazine_title = data['title'] articles = data['articles'] print 'Indexing {} articles in {}'.format(len(articles), magazine_title) for article in articles: document = { 'name': magazine_title, 'title': article['title'], 'author': article['author'], 'content': article['content'] } try: es_client.index(index=index_name, doc_type='articles', body=document) except ConnectionError ,e: sys.exit(e.error) except RequestError, e: sys.exit(e.error)
def index(text, meta, options): es = Elasticsearch() document_id = meta.get('filesystem_absolute_path', '') try: result = es.index( index=index, doc_type="document", id=document_id, body=meta ) except es_exceptions.TransportError as es_error: print(es_error) continue print(result) document_id = result.get('_id') count = 1 result for page in text: try: result = es.index( index=index, doc_type="page", parent=document_id, id="{}_page{}".format(document_id, count), body={"content": page} ) except es_exceptions.TransportError as es_error: print(es_error) continue print(result) count += 1
def loadDatainES(filename,index,doctype,dataFileType,hostname="localhost",port=9200,mappingFilePath=None): try: print "Connecting to "+hostname + " at port: " + str(port) es=Elasticsearch(['http://localhost:9200']) if mappingFilePath: with open(mappingFilePath) as m: mapping = m.read() es.indices.create(index=index,body=mapping,ignore=400) if dataFileType==1: with open(filename) as f: data = json.load(f) for line in data: es.index(index=index,doc_type=doctype,body=line) print "done indexing the json file" elif dataFileType==0: with open(filename) as f: lines = f.readlines() for line in lines: if line.strip() != "": json.loads(line.strip()) es.index(index=index,doc_type=doctype,body=line) print "done indexing the given json file" except Exception, e: print >> stderr.write('ERROR: %s\n' % str(e))
def add_aggregated_info_to_elasticsearch(aggregation_record): ''' The method adds the event json to elasticsearch Args: eventJson: event as json Returns: modified eventJson (with id field added) ''' from elasticsearch import Elasticsearch es = Elasticsearch(['localhost']) # Add id field jsonRecord={} #print(aggregation_record) id= str(aggregation_record[0])+ str(aggregation_record[1]) jsonRecord['id']= id jsonRecord['window_start_time']= aggregation_record[0] jsonRecord['event_type']= aggregation_record[1] jsonRecord['count']= aggregation_record[2] try: es.index(index="events_aggregation", doc_type="events_aggregation", id=id, body=jsonRecord) print(jsonRecord) except Exception as e: print("Exception in es") print(e) return jsonRecord
def create_index(index): es = Elasticsearch([ELASTICSEARCH_HOST]) files = [os.path.join(index.data_folder, x) for x in os.listdir(index.data_folder)] if es.indices.exists(index.slug): print("Deleting '%s' index" % index.slug) res = es.indices.delete(index=index.slug) print(" response: '%s'" % res) stopwords = [] for f in files: #Using experimental tika library - just a little janky response = parse('all', f, TIKA_ENDPOINT)[1] try: if response[0] == '[': #Sometimes response comes in brackets parsed = json.loads(response[1:-1]) else: #Sometimes not. parsed = json.loads(response) content, features = process_content(parsed["X-TIKA:content"], stopwords) parsed["X-TIKA:cleaned"] = content for kw, val in features.items(): parsed["has_" + re.sub(' ', '_', kw)] = val #parsed["authors"] = process_authors(parsed["X-TIKA:content"]) es.index(index=index.slug, doc_type="autonomy", body = parsed, ) except Exception as e: #Strange errors coming from new tika parser #Just move on to the next document print e pass
def handle(self, *args, **options): es = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}]) fop=open('spider/management/commands/'+str(argv[2]), 'r') inds = IndicesClient(es) mapping={ "mappings": { "product_type": { "properties": { "code": { "type" : "string" },"name": {"type" : "string"},"img": {"type" : "string"},"url": {"type" : "string"},"price_reg": {"type" : "float"},"price_discount": {"type" : "float"}}}}} if not inds.exists(index='gearbest_index'): inds.create(index='gearbest_index',body=mapping) print 'gearbest_index created' for jsonline in fop: jobj=loads(jsonline) del jobj["_type"] es.index(index="gearbest_index",doc_type='product_type', body=jobj, id=jobj['code']) disc=0 reg=0 if len(jobj['price_discount'])>0: disc = float(jobj['price_discount'][0]) if len(jobj['price_reg'])>0: reg = float(jobj['price_reg'][0]) #insert="INSERT into 'price_gb' ('price','price_disc','code','date') values ("+str(reg)+", "+str(disc)+", '"+str(jobj['code'])+"', '"+str(datetime.today())+"')" #cursor = connection.cursor() #cursor.execute(insert) add_price=Price_gb(price=reg,price_disc=disc,code=str(jobj['code']),date=datetime.date.today()) add_price.save() print 'code='+str(jobj['code'])
class ChoutiElasticsearch(object): def __init__(self): self.es = Elasticsearch() def create_index(self,title,url): self.es.index(index="chouti", doc_type="chouti-type", body = { "title": title,"url": url, "timestamp": datetime.now() });
class TestMemcachedConnection(ElasticTestCase): def setUp(self): try: import pylibmc except ImportError: raise SkipTest("No pylibmc.") super(TestMemcachedConnection, self).setUp() nodes = self.client.nodes.info() for node_id, node_info in nodes["nodes"].items(): if 'memcached_address' in node_info: connection_info = ADDRESS_RE.search(node_info['memcached_address']).groupdict() self.mc_client = Elasticsearch( [connection_info], connection_class=MemcachedConnection ) break else: raise SkipTest("No memcached plugin.") def test_index(self): self.mc_client.index("test_index", "test_type", {"answer": 42}, id=1) self.assertTrue(self.client.exists("test_index", doc_type="test_type", id=1)) def test_get(self): self.client.index("test_index", "test_type", {"answer": 42}, id=1) self.assertEquals({"answer": 42}, self.mc_client.get("test_index", doc_type="test_type", id=1)["_source"]) def test_unicode(self): self.mc_client.index("test_index", "test_type", {"answer": u"你好"}, id=u"你好") self.assertEquals({"answer": u"你好"}, self.mc_client.get("test_index", doc_type="test_type", id=u"你好")["_source"]) def test_missing(self): self.assertRaises(NotFoundError, self.mc_client.get, "test_index", doc_type="test_type", id=42)
class ElasticSearchProvider(object): """ elasticsearch全文搜索SDK """ def __init__(self, hosts=None): self._es = Elasticsearch(hosts) def insert(self, index, doc_type, doc): """ :arg schema: es的_index :arg table: es的_type :arg row: 需要更新的doc """ res = self._es.index(index, doc_type, doc, doc['id']) return res['created'] def update(self, index, doc_type, doc): """ :arg schema: es的_index :arg table: es的_type :arg row: 需要更新的doc """ self._es.index(index, doc_type, doc, doc['id']) return True def delete(self, index, doc_type, doc): """ :arg schema: es的_index :arg table: es的_type :arg row: 需要更新的doc """ res = self._es.delete(index, doc_type, doc['id']) return res['found']
def es_index(self,p_host,p_port,p_index,p_doctype,p_docid,p_document): """ === Indexes a Document by Doctype and Docid === Indexes a Document on an elasticsearch index according to a doctype and a docid - ``p_host`` - Elasticsearch server - ``p_port`` - Port of the es server - ``p_index`` - Name of the index to query - ``p_doctype`` - Type of the document to index - ``p_docid`` - Id of the document to index - ``p_document`` - Document to index | es index | localhost | 9200 | myIndex | theDocType | id_457891 | {"address": {"street": "myAddress", "city":"Wow city"}} | """ # Es client try: param = [{'host':p_host,'port':int(p_port)}] es = Elasticsearch(param) except Exception: raise AssertionError("Connection error on %s:%i",p_host,int(p_port)) try: es.index(doc_type=p_doctype, id=p_docid, body=p_document, index=p_index) except Exception: raise AssertionError("Index error on %s:%i/%s for document : %s",p_host,int(p_port),p_index,p_document)
def install(fileCheckKey): elasticLatest='6.2.4' #Install Elasticsearch elasticInstalled=False if os.path.isfile('/etc/elasticsearch/elasticsearch.yml'): os.popen('sudo service elasticsearch start').read() while True: elasticVersion=os.popen("curl -XGET '127.0.0.1:9200'").read() try: jsonStuff=json.loads(elasticVersion) if jsonStuff['tagline'] == "You Know, for Search": elasticVersion=jsonStuff['version']['number'] break else: print "Waiting for Elasticsearch to start..." except: print "Exception: Waiting for Elasticsearch to start..." sleep(10) if elasticLatest== elasticVersion.rstrip(): elasticInstalled=True if elasticInstalled == False: print "Installing Elasticsearch" print " Downloading Elasticsearch 6.2.4" os.popen('sudo wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-6.2.4.deb 2>&1').read() if not os.path.isfile('elasticsearch-6.2.4.deb'): sys.exit('Error downloading elasticsearch') if not hashCheck.checkHash('elasticsearch-6.2.4.deb'): sys.exit('Error downloading elasticsearch, mismatched file hashes') print " Installing Elasticsearch" os.popen('sudo dpkg -i elasticsearch-6.2.4.deb').read() print " Cleaning Up Installation Files" os.remove('elasticsearch-6.2.4.deb') os.popen('sudo update-rc.d elasticsearch defaults').read() #Change heap size to 500m (1/2 of phyical memory) shutil.move('/etc/elasticsearch/jvm.options','/etc/elasticsearch/jvm.orig') with open("/etc/elasticsearch/jvm.orig", "rt") as fileIn: with open("/etc/elasticsearch/jvm.options", "wt") as fileOut: for line in fileIn: if line.rstrip() == "-Xms2g": fileOut.write('-Xms256m\n') elif line.rstrip() == "-Xmx2g": fileOut.write('-Xmx256m\n') else: fileOut.write(line) print " Starting Elasticsearch" os.popen('sudo systemctl enable elasticsearch.service').read() os.popen('sudo service elasticsearch start').read() #Sleeping 10 seconds to begin with to give it time to startup. sleep(10) while True: #writeSsIndex = os.popen( # 'curl -XPUT \'127.0.0.1:9200/sweet_security?pretty\' -H \'Content-Type: application/json\' -d\' {"mappings" : {"ports" : {"properties" : {"mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "port" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},"protocol" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},"name" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "product" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "version" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "lastSeen": { "type" : "date" }}}, "devices" : { "properties" : { "hostname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "nickname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "ip4" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "vendor" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "ignore" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "active" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "defaultFwAction" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "isolate" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "firstSeen" : { "type" : "date" }, "lastSeen" : { "type" : "date" }}}, "firewallProfiles" : { "properties" : { "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "destination" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "action" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}}}}\'').read() ssIndex='curl -XPUT \'127.0.0.1:9200/sweet_security?pretty\' -H \'Content-Type: application/json\' -d\'' \ ' {"mappings" : {' \ ' "ports" : {"properties" : {' \ ' "mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "port" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},' \ ' "protocol" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},' \ ' "name" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "product" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "version" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "lastSeen": { "type" : "date" }}}, ' \ ' "devices" : { "properties" : { ' \ ' "hostname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "nickname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "ip4" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "vendor" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "ignore" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "active" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "defaultFwAction" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "isolate" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "firstSeen" : { "type" : "date" }, ' \ ' "lastSeen" : { "type" : "date" }}}, ' \ ' "firewallProfiles" : { "properties" : { ' \ ' "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "destination" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "action" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}},' \ ' "sensors" : { "properties" : { ' \ ' "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "sensorName" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "broHealth" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "logstashHealth" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "diskUsage" : { "type" : "integer"}, ' \ ' "memAvailable" : { "type" : "integer"}, ' \ ' "memPercent" : { "type" : "integer"}, ' \ ' "memConsumed" : { "type" : "integer"}, ' \ ' "firstSeen" : { "type" : "date" }, ' \ ' "lastSeen" : { "type" : "date" }}} ' \ '}}\'' writeSsIndex = os.popen(ssIndex).read() try: jsonSS = json.loads(writeSsIndex) if jsonSS['acknowledged'] == True: print " sweet_security index created" break else: print "Waiting for Elasticsearch to start, will try again in 10 seconds..." except: print "Error: Waiting for Elasticsearch to start, will try again in 10 seconds..." # Sleep 10 seconds to give ES time to get started sleep(10) while True: ssAlertIndex= 'curl -XPUT \'localhost:9200/sweet_security_alerts?pretty\' -H \'Content-Type: application/json\' -d\'{ ' \ ' "mappings" : { ' \ ' "alerts" : { "properties" : { ' \ ' "source" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}}, ' \ ' "message" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}}, ' \ ' "mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "firstSeen" : { "type" : "date" }, ' \ ' "addressedOn" : { "type" : "date" }, ' \ ' "addressed" : { "type" : "integer"}' \ '}}}}\'' writeSsAlertIndex = os.popen(ssAlertIndex).read() try: jsonSSAlert = json.loads(writeSsAlertIndex) if jsonSSAlert['acknowledged'] == True: print " sweet_security_alert index created" break else: print "Waiting for Elasticsearch to start, will try again in 10 seconds..." except: print "Error: Waiting for Elasticsearch to start, will try again in 10 seconds..." # Sleep 10 seconds to give ES time to get started sleep(10) try: try: from elasticsearch import Elasticsearch except: pass esService = Elasticsearch() if fileCheckKey is None: configData = {'defaultMonitor': 0, 'defaultIsolate': 0, 'defaultFW': 1, 'defaultLogRetention': 0} else: configData = {'defaultMonitor': 0, 'defaultIsolate': 0, 'defaultFW': 1, 'defaultLogRetention': 0, 'fileCheckKey': fileCheckKey} #Sleep a second to make sure index has fully created in ES sleep(1) esService.index(index='sweet_security', doc_type='configuration', body=configData) except Exception, e: print e pass while True: tardisIndex='curl -XPUT \'localhost:9200/tardis?pretty\' -H \'Content-Type: application/json\' -d\'' \ ' {"mappings" : {' \ ' "known_dnsqueries" : {"properties" : {' \ ' "mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "query" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}},' \ ' "known_websites" : { "properties" : { ' \ ' "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "server_name" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}}, ' \ ' "firewallProfiles" : { "properties" : { ' \ ' "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "ip" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \ ' "port" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}}' \ '}}\'' writeTardisIndex = os.popen(tardisIndex).read() #writeTardisIndex = os.popen('curl -XPUT \'localhost:9200/tardis?pretty\' -H \'Content-Type: application/json\' -d\' {"mappings" : {"known_hosts" : {"properties" : { "mac" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},"destination" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},"port" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}}}}}}\'').read() try: jsonSS = json.loads(writeTardisIndex) if jsonSS['acknowledged'] == True: print " tardis index created" break else: print "Waiting for Elasticsearch to start, will try again in 10 seconds..." except: print "Error: Waiting for Elasticsearch to start, will try again in 10 seconds..." # Sleep 10 seconds to give ES time to get started sleep(10)
class ElasticsearchWrapper: ''' Elasticsearch 呼び出し ラッパー ・Elasticsearch自身はすでにサービスとして稼働しているものとする ・pythonの「elasticsearch」モジュールは事前にインストールすること ''' def __init__(self, doc_type:str, index:str): ''' 初期化 Parameters ---------- doc_type : str ドキュメントタイプの名前 index : str インデックスの名前 ''' self.es=Elasticsearch("localhost:9200") self.doc_type=doc_type self.index=index def delete_index(self): ''' すでに存在するINDEXを削除する ''' try: self.es.indices.delete(index=self.index) except: pass def make_index(self, setting:dict, mapping:dict): ''' ElasticsearchのINDEX登録処理 Parameters ---------- setting : dict setting指定のJSONデータ mapping : dict mapping指定のJSONデータ ''' # settingsを指定してインデックスを作成 self.es.indices.create(index=self.index, body=setting) # 作成したインデックスのマッピングを指定 self.es.indices.put_mapping(index=self.index, doc_type=self.doc_type, body=mapping) def insert_one(self, doc:dict): ''' 1データを登録する Parameters ---------- doc : dict 登録するJSONデータ ''' self.es.index(index=self.index, doc_type=self.doc_type, body=doc) # id で連番を振っておくと、idでgetできるようになる # id を指定しないと、内部で任意のユニークな文字列が割り当てられる # ここでは、登録順の番号での取得はしないし、検索にはdoc内の項目を用いるので # id無しの登録でよい #self.es.index(index=self.index, doc_type=self.doc_type, body=doc, id=idx) def insert_array(self, docs:list): ''' 配列データを登録する Parameters ---------- docs : list of dict 登録するJSONデータの配列 ''' for doc in docs: self.es.index(index=self.index, doc_type=self.doc_type, body=doc) def search_and(self, items:dict, count:int = 10): ''' ディクショナリで定義された項目(名前、値)のAND条件での検索を行う Parameters ---------- items : dict 項目(名前、値)の一覧 count : int 検索結果の上限数、無指定の場合の初期値10 ''' query = { "query": { "bool" : { "must":[{"match":{key : val}} for key, val in items.items()] } } } return self.__search(query, count) def __search(self, query:dict, count:int): ''' queryで指定された検索式で、Elasticsearchを検索する Parameters ---------- query : dict Elasticsearchの検索Query count : int 検索結果の上限数 ''' results = [] params = { 'size':count } for i in self.es.search(index=self.index, doc_type=self.doc_type, body=query, params=params)["hits"]["hits"]: body = copy.deepcopy(i["_source"]) score = i['_score'] result = {'body':body, 'score':score} results.append(result) return results
count = es.count(index=index_name)['count'] logging.info("Document Count: %s", str(count)) article_id = 1 if not (count > 0): logging.info("Documents deleted. Commencing re-indexing..") doc_list = list_blobs_with_prefix('pubmedbot', 'txt/') for doc in doc_list: file_name = doc.name download_blob('pubmedbot', file_name, file_name) pmid = file_name.split('.')[0] with open(file_name, 'r') as f: text = f.read() new_text = re.sub(r'\n\n.*et al\n', "", text) new_text = re.sub(r'\.\n\n', "\.\n", new_text) new_text = re.sub(r'Page [0-9]+ of [0-9]+\n', "", new_text) new_text = clean_unidentified_characters(new_text) new_text = re.sub(r'(\n )+', "\n", new_text).strip() new_text = re.sub(r'\n+', "\n", new_text).strip() paragraphs = re.split(r'\.( )*\n', new_text) dicts = [] for para in paragraphs: article = {'pmid': pmid, 'text': para} try: es.index(index=index_name, id=article_id, body=article) except RequestError as e: with open('error.log', 'a+') as f: f.write(e) continue article_id = article_id + 1 os.remove(file_name)
scroll='3m', size=10000) #res = es.search(index="not_busy_list_airtel", doc_type='class', body={ # "query": {"bool": {"must": [{"term": {"startdate": busy_list[i]}}, {"term": {"today": today}}]}}}, # scroll='3m', size=10000) # not_busy_list 안에 있는 정보들 중 출발 날짜가 성수기인 것들을 모조리 search 함 for doc in res['hits']['hits']: # 찾은 서치들의 정보들을 doc 이라고 함 res1 = es.get(index="not_busy_list_airtel", doc_type='class', id=doc['_id']) # 성수기 출발상품을 포함하는 아이디들을 호출 doc1 = res1['_source'] # 성수기 출발 상품들의 내용이 doc1 res2 = es.index(index="busydays_airtel", doc_type='class', id=doc['_id'], body=doc1) # busydays라는 인덱스를 생성하고 동일한 id로 성수기 출발 상품들의 내용을 집어 넣음 es.delete(index="not_busy_list_airtel", doc_type='class', id=doc['_id']) # not_busy_list에서 성수기 때 출발하는 상품들의 id들을 삭제함 # 이로서 성수기 필터 완료 for i in range(0, len(today_list)): # 6개월 후의 상품들을 모두 삭제 datetime_object = datetime.datetime.strptime(today_list[i], '%Y-%m-%d') #datetime_object = datetime.datetime.strptime(today, '%Y-%m-%d') five_after = monthdelta(datetime_object, 5) six_after = monthdelta(datetime_object, 6) firstday = five_after.replace(day=1)
# Připojení k ES es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) # Kontrola zda existuje index 'person' if not es.indices.exists(index=INDEX_NAME): # Vytvoření indexu es.indices.create(index=INDEX_NAME) # Index není potřeba vytvářet - pokud neexistuje, tak se automaticky vytvoří při vložení prvního dokumentu # 1. Vložte osobu se jménem John print_delimiter(1) person = { "firstname": "John", } print(es.index(index=INDEX_NAME, id=1, body=person)) # 2. Vypište vytvořenou osobu (pomocí get a parametru id) print_delimiter(2) print(es.get(index=INDEX_NAME, id=1)) # 3. Vypište všechny osoby (pomocí search) print_delimiter(3) print(es.search(index=INDEX_NAME, body={'query': {'match_all': {}}})) # 4. Přejmenujte vytvořenou osobu na 'Jane' print_delimiter(4) print(es.update(index=INDEX_NAME, id=1, body={"doc": {"firstname": "Jane"}})) print(es.get(index=INDEX_NAME, id=1)) # 5. Smažte vytvořenou osobu print_delimiter(5)
''' Licensed to Elasticsearch B.V under one or more agreements. Elasticsearch B.V licenses this file to you under the Apache 2.0 License. See the LICENSE file in the project root for more information ''' from elasticsearch import Elasticsearch es = Elasticsearch() print("bb143628fd04070683eeeadc9406d9cc - L:11") # tag::bb143628fd04070683eeeadc9406d9cc[] response = es.index(index='twitter', id=1, body={ 'user': '******', 'post_date': '2009-11-15T14:12:12', 'message': 'trying out Elasticsearch', }) # end::bb143628fd04070683eeeadc9406d9cc[] print("---------------------------------------") print(response) print("---------------------------------------") print("804a97ff4d0613e6568e4efb19c52021 - L:77") print("TODO") print("d718b63cf1b6591a1d59a0cf4fd995eb - L:121") # tag::d718b63cf1b6591a1d59a0cf4fd995eb[] response = es.index( index='twitter',
def process_item(self, item, spider): es = Elasticsearch(self.es_hosts) es.index(index=self.index_name, doc_type=self.index_type, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8")) # es.index(index=self.index_name, doc_type=self.index_type, pipeline=self.ingest_pipeline, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8")) return item
class DataStore(object): es = None tracer = None def __init__(self): self.es = Elasticsearch(ELASTICSEARCH_URI) self.tracer = logging.getLogger('elasticsearch.trace') if ELASTICSEARCH_TRACE: self.tracer.setLevel(logging.DEBUG) self.tracer.addHandler(logging.FileHandler(LOGGING['filename'])) else: self.tracer.addHandler(logging.NullHandler()) def update(self, doc_id, partial_body): self.es.update(index=ELASTICSEARCH_INDEX, id=doc_id, doc_type='sample', body={'doc': partial_body}) def setup(self): self.es.indices.create(index=ELASTICSEARCH_INDEX, ignore=400) # Ignore already exists def count(self, q=None): if q: result = self.es.count(index=ELASTICSEARCH_INDEX, doc_type='sample', q=q) else: result = self.es.count(index=ELASTICSEARCH_INDEX, doc_type='sample') return result['count'] def all(self, size=10, start=0): try: result = self.es.search(index=ELASTICSEARCH_INDEX, doc_type='sample', body={ 'query': { 'match_all': {}, }, 'from': start, 'size': size, "sort": { "timestamp": { 'order': 'desc' }, } }) except NotFoundError: pass except Exception: raise return result def lucene_search(self, query, start=0, size=15): try: body = { "sort": { "timestamp": { 'order': 'desc' }, } } result = self.es.search(index=ELASTICSEARCH_INDEX, doc_type='sample', q=query, from_=start, size=size, body=body) except NotFoundError: pass except Exception: raise return result def search(self, query): result = [] try: result = self.es.search(index=ELASTICSEARCH_INDEX, doc_type='sample', body={'query': { 'term': query }}) except NotFoundError: pass except Exception: raise return result def save(self, doc_data, doc_id): return self.merge_document('samples', 'sample', doc_data, doc_id) def get(self, doc_id): return self.es.get(index='samples', doc_type='sample', id=doc_id)['_source'] def merge_document(self, index, doc_type, doc_data, doc_id): try: self.es.indices.refresh(index) except Exception as e: raise IOError("Error updating ES index %s (%s)" % (index, e)) original_document = {} # Try to get current data if available try: original_document = self.es.get(index=index, doc_type=doc_type, id=doc_id) if 'hits' in original_document and original_document['hits'][ 'total'] != 0: original_document = original_document['_source'] else: original_document = {} except NotFoundError as e: pass # not found, proceed except Exception as e: raise e if len(original_document) == 0: return self.es.index(index, doc_type, doc_data, id=doc_id) # Merge and index merged_document = dict_merge(original_document, doc_data) return self.es.index(index=index, doc_type=doc_type, body=merged_document, id=doc_id)
# Created on 29.06.18 from datetime import datetime import json from elasticsearch import Elasticsearch es = Elasticsearch('elk.hogwarts.servida.ch:9200') with open( "../../2018-05-17T10_54_28/server_stream_post_requests.json") as file: requests = json.load(file) # es.indices.create(index='ismartalarm-dfrws', body={ # 'settings': { # 'index': { # 'number_of_shards': 1, # 'number_of_replicas': 0 # } # } # }) i = 0 for path in requests: for request in requests[path]: print(request) es.index(index='ismartalarm-dfrws', doc_type='post_requests', id=i, body=request) i += 1 print(i)
ELASTICSEARCH_HOST = url.hostname ELASTICSEARCH_AUTH = url.username + ':' + url.password es = Elasticsearch([{'host': ELASTICSEARCH_HOST}], http_auth=ELASTICSEARCH_AUTH) else: es = Elasticsearch() files_given = sys.argv for file_name in files_given: if file_name == 'index_addresses.py': continue else: file_path = file_name print 'adding ' + file_path with open(file_path, 'r') as csvfile: print "open file" csv_reader = csv.DictReader(csvfile, fieldnames=[], restkey='undefined-fieldnames', delimiter=',') current_row = 0 for row in csv_reader: current_row += 1 if current_row == 1: csv_reader.fieldnames = row['undefined-fieldnames'] continue address = row if current_row % 1000 == 0: print "%s addresses indexed" % current_row es.index(index='addresses', doc_type='address', id=current_row-1, body={'NUMBER': address[' NUMBER'], 'STREET': address[' STREET'], 'ADDRESS': address[' NUMBER'] + ' ' + address[' STREET'], 'X': address['LON'], 'Y': address[' LAT']}) csvfile.close()
class NmapES: "This class will parse an Nmap XML file and send data to Elasticsearch" def __init__(self, input_file,es_ip,es_port,index_name): self.input_file = input_file self.tree = self.__importXML() self.root = self.tree.getroot() self.es = Elasticsearch([{'host':es_ip,'port':es_port}]) self.index_name = index_name def displayInputFileName(self): print(self.input_file) def __importXML(self): # Parse XML directly from the file path return xml.parse(self.input_file) def toES(self): "Returns a list of dictionaries (only for open ports) for each host in the report" for h in self.root.iter('host'): dict_item = {} dict_item['scanner'] = 'nmap' if h.tag == 'host': if 'endtime' in h.attrib and h.attrib['endtime']: dict_item['time'] = time.strftime('%Y/%m/%d %H:%M:%S', time.gmtime(float(h.attrib['endtime']))) for c in h: if c.tag == 'address': if c.attrib['addr'] and c.attrib['addrtype'] == 'ipv4': dict_item['ip'] = c.attrib['addr'] if c.attrib['addr'] and c.attrib['addrtype'] == 'mac': dict_item['mac'] = c.attrib['addr'] elif c.tag == 'hostnames': for names in c.getchildren(): if names.attrib['name']: dict_item['hostname'] = names.attrib['name'] elif c.tag == 'ports': for port in c.getchildren(): dict_item_ports = {} if port.tag == 'port': # print(port.tag, port.attrib) dict_item_ports['port'] = port.attrib['portid'] dict_item_ports['protocol'] = port.attrib['protocol'] for p in port.getchildren(): if p.tag == 'state': dict_item_ports['state'] = p.attrib['state'] elif p.tag == 'service': dict_item_ports['service'] = p.attrib['name'] if 'product' in p.attrib and p.attrib['product']: dict_item_ports['product_name'] = p.attrib['product'] if 'version' in p.attrib and p.attrib['version']: dict_item_ports['product_version'] = p.attrib['version'] if 'banner' in p.attrib and p.attrib['banner']: dict_item_ports['banner'] = p.attrib['banner'] elif p.tag == 'script': if p.attrib['id']: if p.attrib['output']: if 'scripts' in dict_item_ports: dict_item_ports['scripts'][p.attrib['id']] = p.attrib['output'] else: dict_item_ports['scripts'] = dict() dict_item_ports['scripts'][p.attrib['id']] = p.attrib['output'] to_upload = merge_two_dicts(dict_item, dict_item_ports) if to_upload['state'] == 'open': self.es.index(index=self.index_name, doc_type="vuln", body=json.dumps(to_upload))
es = Elasticsearch() # es = Elasticsearch([{'host': 'd.es.dataapi.rea-asia.com', 'port': 9200}]) doc = { 'author': 'Kamal', "searched_keyword": { "search_keyword": "Sunday spk", "matched_places": { "Sunday spk …": 90, "Sunday spkksjff": 89, "XXXXXXXXX": 80 } }, 'timestamp': datetime.now(), } res = es.index(index="keyword", doc_type='search_submit', id=1, body=doc) string_matching = { 'searchkeyword': 'midvalley', 'text': 'most relevant search keywords according to db', 'timestamp': datetime.now(), 'matched_placekeywords': { 1: 'mid valley city', 2: 'mid valley gardens', 3: 'mid valley gardens' } } res = es.index(index="midvalley", doc_type='keywords-search', id=4, body=string_matching)
class ElasticsearchStorage(ExtractedInformationStorage): """ Handles remote storage of the meta data in Elasticsearch """ log = None cfg = None es = None index_current = None index_archive = None mapping = None running = False def __init__(self): self.log = logging.getLogger('elasticsearch.trace') self.log.addHandler(logging.NullHandler()) self.cfg = CrawlerConfig.get_instance() self.database = self.cfg.section("Elasticsearch") self.es = Elasticsearch( [self.database["host"]], http_auth=(str(self.database["username"]), str(self.database["secret"])), port=self.database["port"], use_ssl=self.database["use_ca_certificates"], verify_certs=self.database["use_ca_certificates"], ca_certs=self.database["ca_cert_path"], client_cert=self.database["client_cert_path"], client_key=self.database["client_key_path"]) self.index_current = self.database["index_current"] self.index_archive = self.database["index_archive"] self.mapping = self.database["mapping"] # check connection to Database and set the configuration try: # check if server is available self.es.ping() # raise logging level due to indices.exists() habit of logging a warning if an index doesn't exist. es_log = logging.getLogger('elasticsearch') es_level = es_log.getEffectiveLevel() es_log.setLevel('ERROR') # check if the necessary indices exist and create them if needed if not self.es.indices.exists(self.index_current): self.es.indices.create(index=self.index_current, ignore=[400, 404]) self.es.indices.put_mapping(index=self.index_current, body=self.mapping) if not self.es.indices.exists(self.index_archive): self.es.indices.create(index=self.index_archive, ignore=[400, 404]) self.es.indices.put_mapping(index=self.index_archive, body=self.mapping) self.running = True # restore previous logging level es_log.setLevel(es_level) except ConnectionError as error: self.running = False self.log.error( "Failed to connect to Elasticsearch, this module will be deactivated. " "Please check if the database is running and the config is correct: %s" % error) def process_item(self, item, spider): if self.running: try: version = 1 ancestor = None # search for previous version request = self.es.search( index=self.index_current, body={'query': { 'match': { 'url.keyword': item['url'] } }}) if request['hits']['total']['value'] > 0: # save old version into index_archive old_version = request['hits']['hits'][0] old_version['_source']['descendent'] = True self.es.index(index=self.index_archive, doc_type='_doc', body=old_version['_source']) version += 1 ancestor = old_version['_id'] # save new version into old id of index_current self.log.info("Saving to Elasticsearch: %s" % item['url']) extracted_info = ExtractedInformationStorage.extract_relevant_info( item) extracted_info['ancestor'] = ancestor extracted_info['version'] = version self.es.index(index=self.index_current, doc_type='_doc', id=ancestor, body=extracted_info) except ConnectionError as error: self.running = False self.log.error( "Lost connection to Elasticsearch, this module will be deactivated: %s" % error) return item
def exec_query(stmt): my_lexer=lex(module=lexer,optimize=True,debug=True) my_parser=yacc(debug=True,module=parser) val = my_parser.parse(lexer=my_lexer.clone(),debug=False,input=sql) es = Elasticsearch([{'host':"10.68.23.81","port":9201}]) val.debug() if val.get_type() == TK.TOK_QUERY: query = Query(val) print(query.dsl()) print(query._index,query._type) res = es.search(index=query._index, doc_type = query._type, body=query.dsl(), request_timeout=100) stmt_res = response_hits(res) print(json.dumps(stmt_res,indent=4)) elif val.get_type() == TK.TOK_CREATE_TABLE: stmt = Create(val) res = es.indices.create(index=stmt._index,body = stmt._options,request_timeout=100,ignore= 400) res = es.indices.put_mapping(index = stmt._index, doc_type = stmt._type, body = stmt.dsl(), request_timeout=100) print(json.dumps(res,indent=4)) elif val.get_type() == TK.TOK_INSERT_INTO: # val.debug() stmt = Insert(val) parms = stmt.metas res = es.index(index = stmt._index,doc_type = stmt._type, body = stmt.dsl(),**parms) print(json.dumps(res,indent=4)) elif val.get_type() == TK.TOK_BULK_INTO: # val.debug() stmt = Bulk(val) res = es.bulk(index = stmt._index,doc_type = stmt._type, body = stmt.dsl()) print(json.dumps(res,indent=4)) elif val.get_type() == TK.TOK_UPDATE: val.debug() stmt = Update(val) print(json.dumps(stmt.dsl(),indent=4)) res = es.update(index = stmt._index, doc_type = stmt._type, body = stmt.dsl(), **stmt.conditions) print(json.dumps(res,indent=4)) elif val.get_type() == TK.TOK_UPSERT_INTO: val.debug() stmt = Upsert(val) print(json.dumps(stmt.dsl(),indent=4)) res = es.update(index = stmt._index, doc_type = stmt._type, body = stmt.dsl(), **stmt.conditions) print(json.dumps(res,indent=4)) elif val.get_type() == TK.TOK_DELETE: val.debug() stmt = Delete(val) res = es.delete(index = stmt._index, doc_type = stmt._type, **stmt.conditions,ignore= 404) print(json.dumps(res,indent=4)) elif val.get_type() == TK.TOK_EXPLAIN: stmt = Explain(val) print(stmt.curl_str) print(json.dumps(stmt.dsl(),indent=4)) elif val.get_type() == TK.TOK_DESC_TABLE: stmt = Describe(val) res = es.indices.get_mapping(index = stmt._index,doc_type=stmt._type) print(res) else: res = es.cat.indices(index = 'qs_test*', v=True) val.debug() print(res)
class ElasticConnector(object): def __init__(self): ini_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../elastic.ini') config = configparser.ConfigParser() config.read(ini_file) conf_url = config.get('env', 'url') conf_verify_certs = False if config.get( 'env', 'verify_certs').lower() == "false" else True self.es = Elasticsearch(conf_url, verify_certs=False) self.settings = { "settings": { "index": { "creation_date": "1533116700171", "number_of_shards": "5", "number_of_replicas": "1", "uuid": "3sQuMexES4WE8D5f89INFA", "version": { "created": "6020399" }, "provided_name": "autonapt" } } } self.mapping = { "log": { "properties": { "client": { "properties": { "local": { "properties": { "address": { "type": "ip", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "port": { "type": "long" } } }, "remote": { "properties": { "address": { "type": "ip", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "geoip": { "properties": { "asn": { "properties": { "asn": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } }, "city": { "properties": { "divisions": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "iso_code": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "location": { "type": "geo_point" }, "name": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "postal_code": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } } } }, "port": { "type": "long" } } } } }, "connection_id": { "type": "long" }, "datetime": { "type": "date", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "protocol": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "server": { "properties": { "local": { "properties": { "address": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "port": { "type": "long" } } }, "remote": { "properties": { "address": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "port": { "type": "long" } } } } }, "type": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } } } def create(self, index_name): if self.es.indices.exists(index=index_name): return #self.es.indices.create(index=index_name, body=self.settings) self.es.indices.create(index=index_name) self.es.indices.put_mapping(index=index_name, doc_type='log', body=self.mapping) def store(self, datas): tdatetime = dt.now() index_name = "autonapt-%s" % (tdatetime.strftime('%Y%m%d')) self.create(index_name) # 2018-08-01 09:49:53.571078 tstr = datas['datetime'] datas['datetime'] = dt.strptime(tstr, '%Y-%m-%d %H:%M:%S.%f') return self.es.index(index=index_name, doc_type="log", body=datas) def search(self, datas): return self.es.search(index="autonapt-*", body=datas) def delete(self): return self.es.indices.delete(index="autonapt-*")
for message in consumer: print('Running Consumer..') #parsed_records = [] #record = msg.value print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) #print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, # message.offset, # message.value)) #Values = record['Radhikas-MacBook-Pro.local'] #recordTime = Values['recordTime'] #print(record) #print(type(msg)) #es.index(index='monitor', doc_type='test',body=json.loads(json.dumps(docket_content))) record = json.loads(message.value) newValue = record['data'] print(newValue) #data = message.value.replace("\\", r"\\") #print(json.dumps(data)) es.index(index='monitor', doc_type='_doc', id=i, body=json.loads(newValue)) i = i + 1 #client = MongoClient('localhost:27017') #collection = client.numtest.numtest #for message in consumer: # message = message.value # collection.insert_one(message) # print('{} added to {}'.format(message, collection))
"fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "sibsp": { "type": "long" }, "survived": { "type": "long" }, "ticket": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } } } }) # For a small dataset, you can create each document individually # Otherwise use es.bulk for item in to_json: es.index(index='titanic', doc_type='people', body=json.dumps(item))
import json var = 1 while var == 1: # Initialize Kafka consumer consumer = KafkaConsumer( bootstrap_servers= 'ec2-18-188-248-171.us-east-2.compute.amazonaws.com:9092', group_id='p3consumer', auto_offset_reset='latest') consumer.subscribe(['instagram']) # Initialize Elasticsearch client es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) # Read incoming messages for message in consumer: if "Username" in message.value: # Parse message text content = message.value json_string = content.split('(data-HEAP): ')[1] stalker_report_json = json.loads(json_string) # Push the data to Elasticsearch index es.index(index='instagram_report', doc_type='report', id=stalker_report_json['ImageAnalysis']['Id'], body=stalker_report_json)
class ESStorage(LogStorage): def __init__(self, config): hosts = config.get("HOSTS") kwargs = config.get("OTHER", {}) self.index = config.get("INDEX") or 'jumpserver' self.doc_type = config.get("DOC_TYPE") or 'command_store' self.es = Elasticsearch(hosts=hosts, **kwargs) @staticmethod def make_data(command): data = dict(user=command["user"], asset=command["asset"], system_user=command["system_user"], input=command["input"], output=command["output"], risk_level=command["risk_level"], session=command["session"], timestamp=command["timestamp"]) data["date"] = datetime.fromtimestamp(command['timestamp'], tz=pytz.UTC) return data def bulk_save(self, command_set, raise_on_error=True): actions = [] for command in command_set: data = dict( _index=self.index, _type=self.doc_type, _source=self.make_data(command), ) actions.append(data) return bulk(self.es, actions, index=self.index, raise_on_error=raise_on_error) def save(self, command): """ 保存命令到数据库 """ data = self.make_data(command) return self.es.index(index=self.index, doc_type=self.doc_type, body=data) @staticmethod def get_query_body(match=None, exact=None, date_from=None, date_to=None): if date_to is None: date_to = datetime.now() if date_from is None: date_from = date_to - timedelta(days=7) time_from = date_from.timestamp() time_to = date_to.timestamp() body = { "query": { "bool": { "must": [], "must_not": [], "filter": [{ "range": { "timestamp": { "gte": time_from, "lte": time_to, } } }] } }, "sort": { "timestamp": { "order": "desc", } } } if match: for k, v in match.items(): # 默认组织的org_id为"" if k == 'org_id' and v == '': body["query"]["bool"]["must_not"].append( {"wildcard": { k: "*" }}) continue body["query"]["bool"]["must"].append({"match": {k: v}}) if exact: for k, v in exact.items(): body["query"]["bool"]["filter"].append({"term": {k: v}}) return body def filter(self, date_from=None, date_to=None, user=None, asset=None, system_user=None, input=None, session=None, risk_level=None, org_id=None): match = {} exact = {} if user: exact["user"] = user if asset: exact["asset"] = asset if system_user: exact["system_user"] = system_user if session: match["session"] = session if input: match["input"] = input if org_id is not None: match["org_id"] = org_id if risk_level is not None: match['risk_level'] = risk_level body = self.get_query_body(match, exact, date_from, date_to) # Get total count (Because default size=10) data = self.es.search(index=self.index, doc_type=self.doc_type, body=body, size=0) total = data["hits"]["total"] data = self.es.search(index=self.index, doc_type=self.doc_type, body=body, size=total) return data["hits"] def count(self, date_from=None, date_to=None, user=None, asset=None, system_user=None, input=None, session=None): match = {} exact = {} if user: exact["user"] = user if asset: exact["asset"] = asset if system_user: exact["system_user"] = system_user if session: match["session"] = session if input: match["input"] = input body = self.get_query_body(match, exact, date_from, date_to) del body["sort"] data = self.es.count(body=body) return data["count"] def __getattr__(self, item): return getattr(self.es, item) def all(self): """返回所有数据""" raise NotImplementedError("Not support") def ping(self): try: return self.es.ping() except Exception: return False
import urllib2 import json from elasticsearch import Elasticsearch response = urllib2.urlopen( 'https://api.douban.com/v2/movie/top250?start=200&count=50') html = response.read() top_250_json = json.loads(html) es = Elasticsearch() for movie_json in top_250_json['subjects']: try: movie_url = "https://api.douban.com/v2/movie/" + movie_json['id'] response = urllib2.urlopen(movie_url, timeout=60) html = response.read() detail_movie_json = json.loads(html) print detail_movie_json res = es.index(index="douban", doc_type='movie', id=movie_json['id'], body=detail_movie_json) except: print "except!!"
# sensors 400-600 are Parking space sensors # sensors 600- 800 are Luminosity sensors # sensors 800-1000 are Garbage sensors sensor.value = (random.randint(8,21)) # Generate the values for Smoke in the range from 8 to 21 t = str(sensor.id).encode() # Convert the id to string so it can be sent to the TCP connection print (sensor.id) # Print the sensor id s.send(t) # Send the id on the TCP connection s.send(",") # Send a comma to separate id and value m = str(sensor.value).encode() # Convert sensor value to string so it can be sent to TCP connection s.send(m) # Send the id on the TCP connection s.send(",") # Send a comma to separate value and location k = str(sensor.location).encode() # Convert sensor location to string so it can be sent to TCP connection s.send(k) # Send the location on the TCP connection s.send("\n") # Send a newline character so the next set of values be printed on next line sensor.id = sensor.id+1 # Increment the sensor id by 1 sensor.location = sensor.location+1 # Increment the sensor location by 1 time.sleep(1) # Wait for 1 second to generate next value # Create a document to be sent to elasticsearch doc = { 'sensorid': sensor.id, 'value': sensor.value, 'location': sensor.location, 'timestamp': datetime.datetime.now(), } res = es.index(index="iot", doc_type='smart_building', body=doc) # Index the document in elasticsearch print(res['created']) # Print if indexed successfully if(sensor.id==1001): # Reset the sensor id and location when it reaches 100 sensors sensor.id = 901 # Reset sensor id sensor.location = 901 # Reset sensor location s.close()
class ElasticsearchConnector: def __init__(self): # Initialize parameters and OpenCTI helper config_file_path = os.path.dirname( os.path.abspath(__file__)) + "/config.yml" config = (yaml.load(open(config_file_path), Loader=yaml.FullLoader) if os.path.isfile(config_file_path) else {}) self.helper = OpenCTIConnectorHelper(config) self.elasticsearch_url = get_config_variable("ELASTICSEARCH_URL", ["elasticsearch", "url"], config) self.elasticsearch_ssl_verify = get_config_variable( "ELASTICSEARCH_SSL_VERIFY", ["elasticsearch", "ssl_verify"], config, False, True, ) self.elasticsearch_login = get_config_variable( "ELASTICSEARCH_LOGIN", ["elasticsearch", "login"], config) self.elasticsearch_password = get_config_variable( "ELASTICSEARCH_PASSWORD", ["elasticsearch", "password"], config) self.elasticsearch_index = get_config_variable( "ELASTICSEARCH_INDEX", ["elasticsearch", "index"], config) if (self.helper.connect_live_stream_id is None or self.helper.connect_live_stream_id == "ChangeMe"): raise ValueError("Missing Live Stream ID") # Initilize connection to Elastic if (self.elasticsearch_login is not None and len(self.elasticsearch_login) > 0 and self.elasticsearch_password is not None and len(self.elasticsearch_password) > 0): self.elasticsearch = Elasticsearch( [self.elasticsearch_url], verify_certs=self.elasticsearch_ssl_verify, http_auth=( self.elasticsearch_login, self.elasticsearch_password, ), ) else: self.elasticsearch = Elasticsearch( [self.elasticsearch_url], verify_certs=self.elasticsearch_ssl_verify, ) def _index(self, payload): self.elasticsearch.index(index=self.elasticsearch_index, id=payload["x_opencti_id"], body=payload) def _delete(self, id): self.elasticsearch.delete(index=self.elasticsearch_index, id=id) def _process_message(self, msg): try: data = json.loads(msg.data)["data"] except: raise ValueError("Cannot process the message: " + msg) # Handle creation if msg.event == "create": self.helper.log_info("[CREATE] Processing data {" + data["x_opencti_id"] + "}") return self._index(data) # Handle update if msg.event == "update": self.helper.log_info("[UPDATE] Processing data {" + data["x_opencti_id"] + "}") return self._index(data) # Handle delete elif msg.event == "delete": self.helper.log_info("[DELETE] Processing data {" + data["x_opencti_id"] + "}") return self._delete(data["x_opencti_id"]) return None def start(self): self.helper.listen_stream(self._process_message)
host = input() print("Port:") puerto = input() ES_HOST = {"host": host, "port": puerto} es = Elasticsearch(hosts=[ES_HOST]) print("Index name:") name_index = input() print("Doc type:") tipo = input() print("ID:") identificador = input() print("Data a actualizar:") print('Ejemplo: {"name":dato}') data_new = input() #data_new={"usuario":"desconocido"} if es.indices.exists(index=name_index): resp = es.get(index=name_index, doc_type=tipo, id=identificador) print(resp) resp = es.index(index=name_index, doc_type=tipo, id=identificador, body=data_new) resp_get = es.get(index=name_index, doc_type=tipo, id=identificador) print(resp_get) else: print("El index " + name_index + " no existe")
class HistoryConnector: def __init__(self): config_file_path = os.path.dirname( os.path.abspath(__file__)) + "/config.yml" config = (yaml.load(open(config_file_path), Loader=yaml.FullLoader) if os.path.isfile(config_file_path) else {}) self.helper = OpenCTIConnectorHelper(config) self.logger_config = self.helper.api.get_logs_worker_config() if (self.logger_config["elasticsearch_username"] is not None and self.logger_config["elasticsearch_password"] is not None): self.elasticsearch = Elasticsearch( [self.logger_config["elasticsearch_url"]], verify_certs=self. logger_config["elasticsearch_ssl_reject_unauthorized"], http_auth=( self.logger_config["elasticsearch_username"], self.logger_config["elasticsearch_password"], ), ) elif self.logger_config["elasticsearch_api_key"] is not None: self.elasticsearch = Elasticsearch( [self.logger_config["elasticsearch_url"]], verify_certs=self. logger_config["elasticsearch_ssl_reject_unauthorized"], api_key=self.logger_config["elasticsearch_api_key"], ) else: self.elasticsearch = Elasticsearch( [self.logger_config["elasticsearch_url"]], verify_certs=self. logger_config["elasticsearch_ssl_reject_unauthorized"], ) self.elasticsearch_index = self.logger_config["elasticsearch_index"] def _process_message(self, msg): try: event_json = json.loads(msg.data) unix_time = round(int(msg.id.split("-")[0]) / 1000) event_date = datetime.datetime.fromtimestamp( unix_time, datetime.timezone.utc) timestamp = event_date.isoformat().replace("+00:00", "Z") origin = event_json["origin"] if "origin" in event_json else {} history_data = { "internal_id": msg.id, "event_type": msg.event, "timestamp": timestamp, "entity_type": "history", "user_id": origin["user_id"] if "user_id" in origin else None, "applicant_id": origin["applicant_id"] if "applicant_id" in origin else None, "context_data": { "id": event_json["data"]["x_opencti_internal_id"] if "x_opencti_internal_id" in event_json["data"] else event_json["data"]["x_opencti_id"], "entity_type": event_json["data"]["type"], "from_id": event_json["data"]["x_opencti_source_ref"] if "x_opencti_source_ref" in event_json["data"] else None, "to_id": event_json["data"]["x_opencti_target_ref"] if "x_opencti_target_ref" in event_json["data"] else None, "message": event_json["message"], }, } self.elasticsearch.index(index=self.elasticsearch_index, id=msg.id, body=history_data) except elasticsearch.RequestError as err: print("Unexpected error:", err, msg) pass def start(self): self.helper.listen_stream(self._process_message)
from elasticsearch import Elasticsearch es = Elasticsearch() body = {'title': 'elastic search python client integrated'} s = es.index(index='distance', doc_type='_doc', body=body) print(s) #getData=es.get(index='distance', doc_type='_doc', id=7) #print(getData) """ body={'title':'elastic search python client integrated33335'} res=es.create(index='distance', doc_type='_doc', body=body, id='33335') print(res) """ """ body={'title':'透過python client產生中文,很多很多中文科科科'} res=es.create(index='distance', doc_type='_doc', body=body, id='33336') print(res) """ body = {"doc": {'title': 'haha透過python client產生中文,很多很多中文科科科'}} res = es.update(index='distance', doc_type='_doc', body=body, id='33336') print(res) """ body={"doc": {'title':'new 透過python client產生中文,很多很多中文科科科'}} res=es.create(index='distance', doc_type='_doc', body=body, id='33337') """ # search, we can fit lots result = es.search(index="distance", body={"query": {"match_all": {}}}) print(result)
from elasticsearch import Elasticsearch import json connected = False while not connected: try: es = Elasticsearch(['es']) connected = True except: continue with open('rebu/fixtures/rebu/rebu_testdata.json') as json_file: data = json.load(json_file) for obj in data: if obj['model'] == 'rebu.meal': meal = obj['fields'] meal['id'] = obj['pk'] es.index(index='meals_index', doc_type='meal', id=meal['id'], body=meal) es.indices.refresh(index="meals_index") print("Successfully loaded fixtures into ES")
data['subjectUrl'] = subject[0:subject.rindex("/")] data['subjectValue'] = subject[subject.rindex("/") + 1:] data['predicate'] = predicate data['predicateteUrl'] = predicate[0:predicate.rindex("/")] data['predicateValue'] = predicate[predicate.rindex("/") + 1:] data['object'] = object if 'graduated_at' in predicate: # print predicate # print object if object is not None: data['object'] = object[0:4] if 'object_id' in predicate: # print object # print predicate # print type(str(object.toPython())) object = str(object.toPython()) # print object if map.get(object) is not None: data['object'] = map.get(object) print data.get('object') json_data = json.dumps(data) res = es.index(index=index_name, doc_type=index_type, body=json_data) if counter % 100000 == 0: print counter es.indices.refresh(index=index_name) print "Indexing Done"
def find_config(user): client = MongoClient() db = client.test Scursor = db.SearchHistory.find({'user': user}) #search_hist = [] for hist in Scursor: print hist['history'] list_word = hist['history'] print list_word #csur= db.LikedPosts.find() #for dc in csur: # print dc cursor = db.config.find() user_id = 0 for doc in cursor: if user == doc['user']: break user_id = user_id + 1 #print user_id URL = [] cursor = db.config.find({'user': user}) for document in cursor: for key in document['choice']: #print 'dssd' doccat = 'doc' + key.lower().replace(' ', '_') #print doccat URL.append(("http://*****:*****@@@@@@@@@@@@" myquery = { "query": { "multi_match": { "query": " ".join(list_word), "fields": ["data", "header"] } }, "from": 0, "size": 100 } es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) result = es.search(index="_all", body=myquery) max_score = 0 for rows in result['hits']['hits']: if max_score < rows['_score']: max_score = rows['_score'] for rows in result['hits']['hits']: score = (rows['_score'] / max_score) * 0.5 if len(rows['_source']['scores']) > user_id: rows['_source']['scores'][ user_id] = rows['_source']['scores'][user_id] + score else: for i in range(user_id): rows['_source']['scores'].append(0.15) rows['_source']['scores'].append(0.15 + score) jsondata = rows['_source'] #json.dumps(dict1, ensure_ascii=False) es.index(index='doc' + user.lower() + 'home', doc_type='peopleimg', id=rows['_source']['link'], body=jsondata) print URL for i in URL: myquery = { "query": { "multi_match": { "query": " ".join(list_word), "fields": ["data", "header"] } }, "from": 0, "size": 100 } try: es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) result = es.search(index=i[2], body=myquery) max_score = 0 for rows in result['hits']['hits']: if max_score < rows['_score']: max_score = rows['_score'] for rows in result['hits']['hits']: #print rows['_source']['header'] score = (rows['_score'] / max_score) * 0.5 if len(rows['_source']['scores']) > user_id: rows['_source']['scores'][ user_id] = rows['_source']['scores'][user_id] + score else: for i in range(user_id): rows['_source']['scores'].append(0.15) rows['_source']['scores'].append(0.15 + score) jsondata = rows[ '_source'] #json.dumps(dict1, ensure_ascii=False) es.update(index=i[2], doc_type='peopleimg', id=rows['_source']['link'], body={"doc": jsondata}) except Exception as e: pass #print e '''
"*_Classes\\\\CLSID\\\\*" } }, { "wildcard": { "registry_key_path.keyword": "*\\\\TreatAs" } }] } } } } } res = es.search(index="logs-endpoint-winevent-*", body=doc) count = res['hits']['total']['value'] tactic = "Persistence" technique = "Component Object Model Hijacking" procedure = "Component Object Model Hijacking" tech_code = "T1197" action = { "Tactic": tactic, "Technique": technique, "Tech_code": tech_code, "Procedure": procedure, "EventCount": count, } es.index(index="represent_5", body=action, id=58)
import requests import random from flask import Flask,render_template,request import json from elasticsearch import Elasticsearch es = Elasticsearch([{'host':'localhost','port':9200}]) app = Flask(__name__) #Peoples i = 1 while res.status_code == 200: res = requests.get('https://swapi.co/api/people/' + str(i)) es.index(index='sw',doc_type='people',id=i,body=json.loads(res.content)) i = i + 1 print(i) #Planets i = 1 while res.status_code == 200: res = requests.get('https://swapi.co/api/planets/' + str(i)) es.index(index='sw',doc_type='planets',id=i,body=json.loads(res.content)) i = i + 1 print(i) #Starships i = 1 while res.status_code == 200: res = requests.get('https://swapi.co/api/starships/' + str(i)) es.index(index='sw',doc_type='starships',id=i,body=json.loads(res.content)) i = i + 1
def create_index(data): # connect to the elasticsearch instance es = Elasticsearch("http://ec2-52-3-61-194.compute-1.amazonaws.com:9200") INDEX_NAME = 'parktest' d = {} d['time'] = data[0][0] d['garage_name'] = data[0][1] location = {} location['lat'] = data[0][2] location['lon'] = data[0][3] d['location'] = location d['availability'] = data[1] # get the details about the document with id = garage_name res = es.get(index=INDEX_NAME, doc_type=INDEX_NAME, id=data[0][1], ignore=404) #if the document with id do not exist, create it if not res['found']: es.index(index=INDEX_NAME, doc_type=INDEX_NAME, id=data[0][1], body=d, refresh=True) else: #update the document qq = '{"doc": { "availability":'+str(data[1])+' }}' es.update(index=INDEX_NAME, doc_type=INDEX_NAME,id=data[0][1], body=qq) return d