コード例 #1
0
ファイル: storage.py プロジェクト: haandol/LSHash
class ElasticStorage(BaseStorage):
    def __init__(self, config):
        if not Elasticsearch:
            raise ImportError("elasticsearch-py is required to use Elasticsearch as storage.")
        if not Search:
            raise ImportError("elasticsearch_dsl is required to use Elasticsearch as storage.")

        self.name = 'elasticsearch'
        self.storage = Elasticsearch(**config)

    def keys(self, pattern="*"):
        return self.storage.keys(pattern)

    def set_val(self, key, val):
        body = {
            'key': key,
            'val': ','.join(map(str, val[0])),
            'extra': str(val[1])
        }
        self.storage.index(index='sift', doc_type='sift', body=body)

    def get_val(self, key):
        s = Search(using=self.storage, index='sift')
        return s.filter('term', key=key).execute().hits.hits

    def append_val(self, key, val):
        self.set_val(key, val)

    def get_list(self, key):
        return self.get_val(key)
コード例 #2
0
ファイル: dist.py プロジェクト: pashashocky/cuckoo-modified
    def do_es(self, results, t):
        if HAVE_ELASTICSEARCH:
            try:
                es = Elasticsearch(
                    hosts = [{
                        'host': reporting_conf.elasticsearchdb.host,
                        'port': reporting_conf.elasticsearchdb.port,
                    }],
                    timeout = 60
                )
            except Exception as e: 
                raise CuckooReportError("Cannot connect to ElasticSearch DB")
                return

            index_prefix  = reporting_conf.elasticsearchdb.index

            idxdate = results["info"]["started"].split(" ")[0]
            index_name = '{0}-{1}'.format(index_prefix, idxdate)

            report = {}
            report["task_id"] = results["info"]["id"]
            report["info"]    = results.get("info")
            report["target"]  = results.get("target")
            report["summary"] = results.get("behavior", {}).get("summary")
            report["network"] = results.get("network")
            report["virustotal"] = results.get("virustotal")
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])

            # Store the report and retrieve its object id.
            es.index(index=index_name, doc_type="analysis", id=results["info"]["id"], body=report)
コード例 #3
0
ファイル: search.py プロジェクト: janastu/pensieve
def createIndex():
    """This endpoint should be used to index pages of a Mouchak installation.
    FIXME:
    - Endpoint is only accessible from the index page of search service.
    - Does not support cross origin requests.
    - Better name for the function.

    """
    es = Elasticsearch()
    if not es.indices.exists(urlparse(request.form['url']).netloc):
        url = request.form['url']
        if not request.form['url'].endswith('/'):
            url = request.form['url'] + '/'
        try:
            contents = requests.get(url + "pages").json()
            for content in contents:
                es.index(index=urlparse(request.form['url']).netloc,
                         doc_type="html", body=content, id=content['id'])
            response = make_response()
            response.data = "Website indexed."
            return response
        except:
            response = make_response()
            response.status_code = 204
            return response
    else:
        response = make_response()
        response.status_code = 409
        response.data = {"reason": "Index already exists"}
        return response
コード例 #4
0
def loadDatainES(filename, index, doctype,dataFileType,hostname="localhost",port=9200,mappingFilePath=None,username="",password="",protocol="http"):
    try:
        print "Connecting to " + hostname + " at port:" + str(port) 
       # es = Elasticsearch([{'host': hostname, 'port': port}])

        if username != "" and password != "":
            es = Elasticsearch([protocol + '://' + username + ':' + password + '@'+hostname + ":" + str(port)],show_ssl_warnings=False)
        else:
            es = Elasticsearch([protocol + '://'+hostname + ":" + str(port)],show_ssl_warnings=False)
        
        if mappingFilePath:
            with open(mappingFilePath) as m:
                mapping = m.read()
                #print "Mapping file:" + mapping
                es.indices.create(index=index,  body=mapping,ignore=400)
                
        if dataFileType=="1":
            with open(filename) as f:   
                d = json.load(f)
                for wp in d:
                  res = es.index(index=index,doc_type=doctype,body=wp,id=wp["uri"])
                  print "indexing id: " + res["_id"] + " for uri: " + wp["uri"]
        elif dataFileType == "0":
            with open(filename) as f:
                lines = f.readlines()

                for line in lines:
                    if line.strip() != "":
                        jsonurlobj = json.loads(line.strip())
                        objkey = jsonurlobj['uri']
                        res = es.index(index=index,doc_type=doctype,body=line)
                        print "indexing id: " + res["_id"] + " for uri: " + objkey
    except Exception, e:
        print >> stderr.write('ERROR: %s\n' % str(e))
コード例 #5
0
def loadNerOutputs(anndir):

    #setIds = ["Citalopram-4259d9b1-de34-43a4-85a8-41dd214e9177","Escitalopram-13bb8267-1cab-43e5-acae-55a4d957630a","Fluoxetine-5f356c1b-96bd-4ef1-960c-91cf4905e6b1"]
    #setIds = ["55816042-946d-4bec-9461-bd998628ff45","c00d1607-ac36-457b-a34b-75ad74f9cf0a","70b079e2-a1f7-4a93-8685-d60a4d7c1280","38642D80-AAA6-4196-A033-3977FF35B48A"]

    ann_ner = loadJsonFromDir(anndir)
    #print len(ann_ner)

    #idx = 1
    for ann in ann_ner:
        
        #if ann["setId"] in setIds:
        dict_paras = parseSingleResource(ann)
        ann_domeo = buildAnnotation(dict_paras, SAMPLE_DOMEO)

        # load all annotations
        if ann_domeo:

            # load 11 - 208
            #if ann_domeo and (int(dict_paras["fileId"]) > 10):

            es = Elasticsearch()
            es.index(index="domeo", doc_type=COLLECTION, id=dict_paras["mongo_uuid"], body=json.dumps(ann_domeo))

            insert_annotation(dict_paras)
            print "[INFO] load annotations:" +str(ann["setId"]) 
            #print "load annotations for " + dict_paras["annotates_url"]

            #idx = idx + 1
        else:
            print "[ERROR] annotation empty"
コード例 #6
0
    def es_index(self,p_host,p_port,p_index,p_doctype,p_docid,p_document):
        """
        Indexes a document on an elasticsearch index according to a doctype and a docid

        {p_host}   Elasticsearch server\n
        {p_port}   Port of the es server\n
        {p_index}  Name of the index to query\n
        {p_doctype}  type of the document to index\n
        {p_docid}     Id of the document to index\n
        {p_document}  Document to index\n

        | es index | localhost | 9200 | myIndex | theDocType | id_457891 | {"adress":{"street":"myAdress", "city":"Wow city"}} |
        """
        
        # Es client
        try:
            param = [{'host':p_host,'port':int(p_port)}]
            es = Elasticsearch(param)
        except Exception:
            raise AssertionError("Connexion error on %s:%i",p_host,int(p_port))

        try:
            es.index(doc_type=p_doctype, id=p_docid, body=p_document, index=p_index)
        except Exception:
            raise AssertionError("Index error on %s:%i/%s for document : %s",p_host,int(p_port),p_index,p_document)
コード例 #7
0
ファイル: models.py プロジェクト: sebasgoldberg/fuzzysku
 def index(self):
     es = Elasticsearch()
     es.index(
         index=ES_FAMILIAS_INDEX,
         doc_type=ES_FAMILIAS_DOC_TYPE,
         id=self.index_key(),
         body=self.index_dict())
コード例 #8
0
ファイル: main.py プロジェクト: phamal/aageno
def addNote():
    es = Elasticsearch(['http://159.203.66.191:9200'])
    id = ""
    noteStr = ""
    if request.method == 'POST':
       id = request.form['id']
       noteStr = request.form['note']
       if len(noteStr.strip()) > 0 and len(id.strip()):
           note = {};
           note["maintag"] = id
           note["body"] = noteStr
           es.index(index="brahman", doc_type='note', id=note["maintag"], body=note)
           return redirect(url_for('index'))
    elif request.method == "GET":
       id = request.args.get("id", "")
    if (len(id) > 0):
        note = {}
        try:
            res = es.get(index="brahman", doc_type='note', id=id)
            note["title"] = id;
            note["body"] = str(res['_source']['body']).strip()
        except TransportError as e:
            note["title"] = id;
            note["body"] = ""

    return render_template("addNote.html",note=note);
コード例 #9
0
def indexpage_off(url):
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    soup.get_text()
    es = Elasticsearch()
    es.index(index="bc", doc_type='webpage', body={"timestamp": datetime.now(),"text":soup.get_text(),"url":url})
    return True
コード例 #10
0
def scan_and_push_to_es(root_folder):
    """
    scan files under root_folder and push to ES server
    :param root_folder: folder to scan in unicode
    :return: None
    """
    assert root_folder[:2] == r'\\'

    machine = root_folder[2:][0:root_folder[2:].find('\\')]
    es = Elasticsearch()

    for root, dirs, files in os.walk(root_folder):
        for name in files:
            try:
                fullname = (os.path.join(root, name)).encode('utf-8')
                if check_if_already_exists(fullname):
                    path = os.path.dirname(fullname)
                    size = os.path.getsize(fullname.decode('utf-8'))  # buggy when long name
                    mtime = os.path.getmtime(fullname.decode('utf-8'))  # buggy when long name
                    doc = {
                        'machine': machine,
                        'path': path,
                        'full': fullname,
                        'name': name,
                        'size': size,
                        'mtime': str(datetime.fromtimestamp(int(mtime)))
                    }
                    sys.stdout.write('.')
                    es.index(index="file-index", doc_type='file', body=doc)
            except Exception, e:
                pass
コード例 #11
0
ファイル: spark.py プロジェクト: shashidonthiri9/Big-Data-
def Send_To_ElasticSearch(partition):
    
    
    print("send")
    tweets = list(partition)
    print(tweets,len(tweets))
    
    elastic_search = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    
    if(elastic_search.indices.exists(index = "location6")):
        print("if")
        if(len(tweets) != 0):
            for tweet in tweets:
                
                doc = {
                    "text": tweet['text'],
                    "location6": {
                            "lat": tweet['coordinates'][1],
                            "lon": tweet['coordinates'][0]
                            },
                    "sentiment":tweet['sentiment']
                    
                    }
                if(tweet['coordinates'][1] != 0 and tweet['coordinates'][0] !=0 ):
                    elastic_search.index(index="location6", doc_type='request-info', body=doc)
    else:
        print("else")
        mappings = {
        "mappings": {
            "request-info": {
                "properties": {
                    "text": {
                        "type": "text"
                    },
                    "location6": {
                        "type": "geo_point"
                    },
                    "sentiment": {
                        "type": "text"
                    }                        
                }
            }
        }
    }

        elastic_search.indices.create(index='location6', body=mappings)
        if(len(tweets) != 0):
            for tweet in tweets:
                
                doc = {
                    "text": tweet['text'],
                    "location6": {
                            "lat": tweet['coordinates'][1],
                            "lon": tweet['coordinates'][0]
                            },
                    "sentiment":tweet['sentiment']
                    
                    }
                if(tweet['coordinates'][1] != 0 and tweet['coordinates'][0] !=0 ):
                    elastic_search.index(index="location6", doc_type='request-info', body=doc)
コード例 #12
0
ファイル: lucene.py プロジェクト: henypan/eventlogger
def start_index(index_json, eventdata):
    print('start_index')
    with open(index_json) as json_file:
        index_dict = json.load(json_file, object_pairs_hook=OrderedDict)
    try:
        host_ip = index_dict['host']
        index_name = index_dict['index_name']
    except KeyError:
        sys.exit('The format of input JSON is not correct.')

    es = Elasticsearch(hosts=host_ip, timeout=120)
    host_url = 'http://' + host_ip + ':9200/'
    if not es.indices.exists(index=index_name):
        init_index(host_url, index_name, index_dict)
    data_row = dict()
    title = eventdata.question_text.strip().lower()
    number = eventdata.number
    search_query = 'Number: query'.replace('query', str(number))
    matches = es.search(index=index_name, q=search_query, size=1000)
    hits = matches['hits']['hits']
    frequencies = 1
    frequencies += len(hits)

    data_row['Title'] = title
    data_row['Number'] = number
    data_row['Difficulty'] = eventdata.difficulty
    data_row['Note'] = eventdata.note.strip().lower()
    data_row['Method'] = eventdata.method.strip().lower()
    data_row['LogTime'] = eventdata.pub_date.strftime('%Y-%m-%dT%H:%M:%S')
    es.index(index_name, 'leet', data_row)
    print('Load to elasticsearch completed')
    return frequencies
コード例 #13
0
def annotate(config, documentId):
  if "getPosTags" in config and config["getPosTags"] == False: return
  esClient = Elasticsearch(config["elasticsearch"]["host"] + ":" + str(config["elasticsearch"]["port"]))
  corpusIndex = config["corpus"]["index"]
  corpusType = config["corpus"]["type"]
  corpusFields = config["corpus"]["text_fields"]
  processorIndex = config["processor"]["index"]
  processorType = config["processor"]["type"]
  document = esClient.get(index=corpusIndex, doc_type=corpusType, id = documentId, fields=corpusFields)
  content = ""
  if "fields" in document:
    for field in corpusFields:
      if field in document["fields"]:
        if type(document["fields"][field]) is list:
          for element in document["fields"][field]:
            content += element + ". "
        else:
          content += document["fields"][field] + ". "
      
  annotatedDocument = {}
  sentences = nltk.sent_tokenize(content)
  posTaggedSentences = []
  for sentence in sentences:
    sentence = sentence.strip()
    if len(sentence) > 1:
      sentence = sentence.replace("-", " ")
      sentenceWords = nltk.word_tokenize(sentence.lower())
      sentenceWords = map(lambda x: x.replace(".", ""), sentenceWords)
      posTags = nltk.pos_tag(sentenceWords)
      posTaggedSentences.append(posTags)
  if esClient.exists(index=processorIndex, doc_type=processorType, id=document["_id"]):
    annotatedDocument = esClient.get(index=processorIndex, doc_type=processorType, id=document["_id"])["_source"]
  annotatedDocument["pos_tagged_sentences"] = posTaggedSentences
  esClient.index(index=processorIndex, doc_type=processorType, id=document["_id"], body=annotatedDocument)
  config["logger"].info("pos-processor: Annotated document '" + document["_id"] + "'")
コード例 #14
0
class ElasticsearchUtils(object):
    def __init__(self, host_ports):
        # host_ports格式 [{'host':'xxx', 'port':9200},{}]
        self.host_ports = host_ports
        self.es = None

    def init_connect(self):
        self.es = Elasticsearch(self.host_ports)
        return self.es.ping()


    def get_search_result(self, index_name, type_name, query_body):
        if self.es:
            return self.es.search(index=index_name, doc_type=type_name, body=query_body)
        return

    def get_id_result(self, index_name, type_name, doc_id):
        if self.es:
            return self.es.get(index=index_name, doc_type=type_name, id=doc_id)['_source']
        return


    # doc_id为None说明让es自动生成id
    def add_index_doc(self, index_name, type_name, doc_id, doc_body):
        if doc_id:
            self.es.index(index=index_name, doc_type=type_name, id=doc_id, body=doc_body)
        else:
            self.es.index(index=index_name, doc_type=type_name, body=doc_body)

    def batch_index(self, index_name, type_name, doc_body_lines):
        self.es.bulk(index=index_name, doc_type=type_name, body=doc_body_lines)
コード例 #15
0
ファイル: listener.py プロジェクト: amorskoy/openrnd
    def do_POST(self):
        global  csvPath

        try:
            content_len = int(self.headers.getheader('content-length', 0))
            body = json.loads(self.rfile.read(content_len))

            dict = {"url" : body['url'], "text" : body['text']}

            es = Elasticsearch()
            es.index(index="articles", doc_type="article", body=dict)

            with open(csvPath,'ab') as fout:
                writer = UnicodeWriter(fout,quoting=csv.QUOTE_ALL)
                writer.writerow(dict.values())


            self.send_response(200)
            self.send_header("Content-type", "application/json")
            self.end_headers()
            self.wfile.write( json.dumps({"result":True}) )

        except Exception, e:

            exc_type, exc_obj, exc_tb = sys.exc_info()
            print(" Type: %s | File: %s | Line number: %s " % (exc_type, os.path.abspath(__file__), exc_tb.tb_lineno))
            print e.message

            self.send_response(500)
            self.send_header("Content-type", "application/json")
            self.end_headers()
            self.wfile.write( json.dumps({"result":False}) )
コード例 #16
0
ファイル: index_videos.py プロジェクト: unistra/pod
	def handle(self, *args, **options):	
		# Activate a fixed locale fr
		translation.activate('fr')

		es = Elasticsearch(ES_URL)
		if args:
			if args[0]=='__ALL__':
				delete = es.indices.delete(index='pod', ignore=[400, 404])
				#delete = es.delete_by_query(index="pod", doc_type='pod', body={"query":{"match_all":{}}})
				json_data = open('pods/search_template.json')   
				es_template = json.load(json_data)
				try:
				    create = es.indices.create(index='pod', body=es_template) #ignore=[400, 404]
				except TransportError as e:
			            # (400, u'IndexAlreadyExistsException[[pod] already exists]')
				    if e.status_code==400:
				        print "l'index Pod est existant : %s" %e.error
			            else:
				        print "Une erreur est survenue lors de la creation de l'index : %s-%s" %( e.status_code, e.error)
				from pods.views import VIDEOS
				for pod in VIDEOS:
					res = es.index(index="pod", doc_type='pod', id=pod.id, body=pod.get_json_to_index(), refresh=True)
			else:
				for pod_id in args:
					try:
						pod = Pod.objects.get(pk=int(pod_id))
					except Pod.DoesNotExist:
						raise CommandError('Pod "%s" does not exist' % pod_id)
					res = es.index(index="pod", doc_type='pod', id=pod.id, body=pod.get_json_to_index(), refresh=True)
		else:
			print "******* Warning : you must give some arguments : %s *******" %self.args
コード例 #17
0
class IndexTalks:

    index_name = 'gc'
    doc_type = 'talk'

    def __init__(self):
        self.ft = FetchTalks()
        self.es = Elasticsearch()
        self.es_id_seq = 0
        self.confId = ''

    def _FetchIndividualTalk(self, url):
        return urllib.request.urlopen(url)

    def FetchTalksAndIndexThem(self, weekendUrl):
        self.confId, talkUrls = self.ft.FetchTalks(weekendUrl)
        print(str.format('confId: {}, num talk urls: {}', self.confId, len(talkUrls)))
        for url in talkUrls:
            handle = self._FetchIndividualTalk(url)
            self._InsertOneTalkIntoES(handle, url)

    def _GetNextId(self):
        result = self.es_id_seq
        self.es_id_seq = self.es_id_seq + 1
        return result

    def _GetTitleAndAuthor(self, line, tag, tagIndex):
        titleString = HtmlTagParser.GetTagContents(tag, line, tagIndex)
        print('title string: ' + titleString)
        titleSegments = titleString.split('-')
        title = titleSegments[0].strip()
        author = titleSegments[1].strip()
        if author.find('By') == 0:
            author = author[3:].strip()
        return ( title, author )

    def _GetTitleAuthorContent(self, talkHandle):
        title = ''
        author = ''
        titleOpenTag = '<title>'
        titleFound = False
        talkContent = ''
        for line in talkHandle:
            #strLine = str(line)
            strLine = line.decode()
            talkContent = talkContent + strLine
            if titleFound == False:
                titleIndex = strLine.find(titleOpenTag)
                if titleIndex != -1:
                    title, author = self._GetTitleAndAuthor(strLine, titleOpenTag, titleIndex)
                    titleFound = True
        return ( title, author, talkContent )

    def _InsertOneTalkIntoES(self, talkHandle, talkUrl):
        title, author, talkContent = self._GetTitleAuthorContent(talkHandle)
        idnum = self._GetNextId()
        idNumStr = str(idnum)
        print('indexing doc num: ' + idNumStr)
        json_body = json.dumps({'talkSortId': idNumStr, 'title': title, 'author': author, 'confid': self.confId, 'content': talkContent, 'url': talkUrl})
        self.es.index(index=self.index_name, doc_type=self.doc_type, id=idnum, body=json_body)
コード例 #18
0
class StreamingIndexer(TwythonStreamer):

    def __init__(self, consumer_key=None, consumer_secret=None,
                 access_token=None, access_token_secret=None,
                 es_host=None, es_port=None, es_index=None):

        super(StreamingIndexer, self).__init__(consumer_key, consumer_secret,
                                               access_token,
                                               access_token_secret)

        self._es = Elasticsearch([{'host': es_host, 'port': es_port}])
        self._index = es_index

    def on_success(self, tweet):
        if 'delete' in tweet:
            status_id = tweet['delete']['status']['id']
            self._es.delete(self._index, 'tweet', status_id)
            return

        if 'retweeted_status' in tweet:
            tweet = tweet['retweeted_status']

        for url in tweet['entities']['urls']:
            if 'theguardian.com' in url['expanded_url']:
                url['domain'] = 'theguardian.com'

        self._es.index(index=self._index, doc_type='tweet',
                       id=tweet['id_str'], body=tweet)
コード例 #19
0
ファイル: t.py プロジェクト: donggry/instagram_crawler
class ScoreMerge(object):

    def __init__(self):
        self.es = Elasticsearch()
        self.count = 0
        self.category = open("/home/eunsoo/Downloads/tutorial/tutorial/category.txt", "r").read().split()

    def scoreMerge(self):
        self.es.indices.delete(index='merge', ignore=[400, 404])
        for cat in self.category:
            self.count = self.count + 1
            acc_score = 0.0
            post_count = 0
            search_results = self.es.search(index="scoretest", doc_type='categorized', body={"query": { "match": {"category": cat}}})
            if('hits' in search_results):
                #print search_results['hits']['hits']
                for search_result in search_results['hits']['hits']:
                    acc_score = acc_score + search_result['_source']['score']
                    post_count = post_count + 1

                self.es.index(index="mergetest", doc_type="merge", id = cat,
                              body={"category": cat, "acc_score": acc_score, "post_count" : post_count })
                print cat+"successfully merged"

    def displaySortedCategory(self):
        search_results = self.es.search(index="mergetest", doc_type='merge',
                                        body={"sort": {"acc_score": {"order": "desc"}}})

        print search_results
        #print "category / acc_likes / acc_comments_count / acc_score / post_count"
        for search_result in search_results['hits']['hits']:
            print ("%s/%10f/%10f" %(search_result['_source']['category'],search_result['_source']['acc_score'],search_result['_source']['post_count']))
コード例 #20
0
ファイル: utils.py プロジェクト: glenbot/hacker_monthly
def index_data(index_name, epub_path):
    """Index magazine data"""
    es_client = Elasticsearch()
    files = list_epub_files(epub_path)

    for _file in files:
        data = parse_epub(_file)
        magazine_title = data['title']
        articles = data['articles']
        print 'Indexing {} articles in {}'.format(len(articles), magazine_title)

        for article in articles:
            document = {
                'name': magazine_title,
                'title': article['title'],
                'author': article['author'],
                'content': article['content']
            }

            try:
                es_client.index(index=index_name, doc_type='articles', body=document)
            except ConnectionError ,e:
                sys.exit(e.error)
            except RequestError, e:
                sys.exit(e.error)
コード例 #21
0
ファイル: PdfIndexer.py プロジェクト: zuloo/frisc
def index(text, meta, options):
    es = Elasticsearch()
    document_id = meta.get('filesystem_absolute_path', '')
    try:
        result = es.index(
            index=index,
            doc_type="document",
            id=document_id,
            body=meta
        )
    except es_exceptions.TransportError as es_error:
        print(es_error)
        continue
    print(result)
    document_id = result.get('_id')
    count = 1
    result
    for page in text:
        try:
            result = es.index(
                index=index,
                doc_type="page",
                parent=document_id,
                id="{}_page{}".format(document_id, count),
                body={"content": page}
            )
        except es_exceptions.TransportError as es_error:
            print(es_error)
            continue
        print(result)
        count += 1
コード例 #22
0
ファイル: loadDatainES.py プロジェクト: rajagopal067/testrepo
def loadDatainES(filename,index,doctype,dataFileType,hostname="localhost",port=9200,mappingFilePath=None):
    try:
        print "Connecting to "+hostname + " at port: " + str(port)
        es=Elasticsearch(['http://localhost:9200'])

        if mappingFilePath:
            with open(mappingFilePath) as m:
                mapping = m.read()
                es.indices.create(index=index,body=mapping,ignore=400)

        if dataFileType==1:
            with open(filename) as f:
                data = json.load(f)
                for line in data:
                    es.index(index=index,doc_type=doctype,body=line)
                print "done indexing the json file"

        elif dataFileType==0:
            with open(filename) as f:
                lines = f.readlines()
                for line in lines:
                    if line.strip() != "":
                        json.loads(line.strip())
                        es.index(index=index,doc_type=doctype,body=line)

                print "done indexing the given json file"
    except Exception, e:
        print >> stderr.write('ERROR: %s\n' % str(e))
コード例 #23
0
def add_aggregated_info_to_elasticsearch(aggregation_record):
    '''
            The method adds the event json to elasticsearch
            Args:
                eventJson: event as json
            Returns:
                modified eventJson (with id field added)
    '''
    from elasticsearch import Elasticsearch
    es = Elasticsearch(['localhost'])
    # Add id field
    jsonRecord={}
    #print(aggregation_record)
    id= str(aggregation_record[0])+ str(aggregation_record[1])
    jsonRecord['id']= id
    jsonRecord['window_start_time']= aggregation_record[0]
    jsonRecord['event_type']= aggregation_record[1]
    jsonRecord['count']= aggregation_record[2]
    try:
        es.index(index="events_aggregation", doc_type="events_aggregation", id=id, body=jsonRecord)
        print(jsonRecord)
    except Exception as e:
        print("Exception in es")
        print(e)
    return jsonRecord
コード例 #24
0
ファイル: tika_tasks.py プロジェクト: MJJoyce/memex-explorer
def create_index(index):
    es = Elasticsearch([ELASTICSEARCH_HOST])
    files = [os.path.join(index.data_folder, x) for x in os.listdir(index.data_folder)]
    if es.indices.exists(index.slug):
        print("Deleting '%s' index" % index.slug)
        res = es.indices.delete(index=index.slug)
        print("  response: '%s'" % res)

    stopwords = []

    for f in files:
        #Using experimental tika library - just a little janky
        response = parse('all', f, TIKA_ENDPOINT)[1]
        try:
            if response[0] == '[':
                #Sometimes response comes in brackets
                parsed = json.loads(response[1:-1])
            else:
                #Sometimes not.
                parsed = json.loads(response)
            content, features = process_content(parsed["X-TIKA:content"], stopwords)
            parsed["X-TIKA:cleaned"] = content
            for kw, val in features.items():
                parsed["has_" + re.sub(' ', '_', kw)] = val
            #parsed["authors"] = process_authors(parsed["X-TIKA:content"])
            es.index(index=index.slug,
                     doc_type="autonomy",
                     body = parsed,
                     )
        except Exception as e:
            #Strange errors coming from new tika parser
            #Just move on to the next document
            print e
            pass
コード例 #25
0
ファイル: welast.py プロジェクト: prodja/djascralog
	def handle(self, *args, **options):

		es = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

		fop=open('spider/management/commands/'+str(argv[2]), 'r')
		inds = IndicesClient(es)

		mapping={ "mappings": { "product_type":  {  "properties": { "code": { "type" : "string" },"name": {"type" : "string"},"img": {"type" : "string"},"url": {"type" : "string"},"price_reg": {"type" : "float"},"price_discount": {"type" : "float"}}}}}

		if not inds.exists(index='gearbest_index'):
			inds.create(index='gearbest_index',body=mapping)
			print 'gearbest_index created'

		for jsonline in fop:
			jobj=loads(jsonline)
			del jobj["_type"]
			es.index(index="gearbest_index",doc_type='product_type', body=jobj, id=jobj['code'])
			
			disc=0
			reg=0

			if len(jobj['price_discount'])>0:
				disc  = float(jobj['price_discount'][0])
			if len(jobj['price_reg'])>0:
				reg  = float(jobj['price_reg'][0])

			#insert="INSERT into 'price_gb' ('price','price_disc','code','date') values ("+str(reg)+", "+str(disc)+", '"+str(jobj['code'])+"', '"+str(datetime.today())+"')"
			#cursor = connection.cursor()
			#cursor.execute(insert)

			add_price=Price_gb(price=reg,price_disc=disc,code=str(jobj['code']),date=datetime.date.today())
			add_price.save()

			print 'code='+str(jobj['code'])
コード例 #26
0
class ChoutiElasticsearch(object):

    def __init__(self):
       self.es = Elasticsearch()

    def create_index(self,title,url):
        self.es.index(index="chouti", doc_type="chouti-type", body = { "title": title,"url": url, "timestamp": datetime.now() });
コード例 #27
0
class TestMemcachedConnection(ElasticTestCase):
    def setUp(self):
        try:
            import pylibmc
        except ImportError:
            raise SkipTest("No pylibmc.")
        super(TestMemcachedConnection, self).setUp()
        nodes = self.client.nodes.info()
        for node_id, node_info in nodes["nodes"].items():
            if 'memcached_address' in node_info:
                connection_info = ADDRESS_RE.search(node_info['memcached_address']).groupdict()
                self.mc_client = Elasticsearch(
                    [connection_info],
                    connection_class=MemcachedConnection
                )
                break
        else:
            raise SkipTest("No memcached plugin.")

    def test_index(self):
        self.mc_client.index("test_index", "test_type", {"answer": 42}, id=1)
        self.assertTrue(self.client.exists("test_index", doc_type="test_type", id=1))

    def test_get(self):
        self.client.index("test_index", "test_type", {"answer": 42}, id=1)
        self.assertEquals({"answer": 42}, self.mc_client.get("test_index", doc_type="test_type", id=1)["_source"])

    def test_unicode(self):
        self.mc_client.index("test_index", "test_type", {"answer": u"你好"}, id=u"你好")
        self.assertEquals({"answer": u"你好"}, self.mc_client.get("test_index", doc_type="test_type", id=u"你好")["_source"])

    def test_missing(self):
        self.assertRaises(NotFoundError, self.mc_client.get, "test_index", doc_type="test_type", id=42)
コード例 #28
0
class ElasticSearchProvider(object):
    """
    elasticsearch全文搜索SDK
    """

    def __init__(self, hosts=None):
        self._es = Elasticsearch(hosts)

    def insert(self, index, doc_type, doc):
        """
        :arg schema: es的_index
        :arg table: es的_type
        :arg row: 需要更新的doc
        """
        res = self._es.index(index, doc_type, doc, doc['id'])
        return res['created']

    def update(self, index, doc_type, doc):
        """
        :arg schema: es的_index
        :arg table: es的_type
        :arg row: 需要更新的doc
        """
        self._es.index(index, doc_type, doc, doc['id'])
        return True


    def delete(self, index, doc_type, doc):
        """
        :arg schema: es的_index
        :arg table: es的_type
        :arg row: 需要更新的doc
        """
        res = self._es.delete(index, doc_type, doc['id'])
        return res['found']
コード例 #29
0
    def es_index(self,p_host,p_port,p_index,p_doctype,p_docid,p_document):
        """
        === Indexes a Document by Doctype and Docid ===
        
        Indexes a Document on an elasticsearch index according to a doctype and a docid

        - ``p_host`` - Elasticsearch server
        - ``p_port`` - Port of the es server
        - ``p_index`` - Name of the index to query
        - ``p_doctype`` - Type of the document to index
        - ``p_docid`` - Id of the document to index
        - ``p_document`` - Document to index

        | es index | localhost | 9200 | myIndex | theDocType | id_457891 | {"address": {"street": "myAddress", "city":"Wow city"}} |
        """
        
        # Es client
        try:
            param = [{'host':p_host,'port':int(p_port)}]
            es = Elasticsearch(param)
        except Exception:
            raise AssertionError("Connection error on %s:%i",p_host,int(p_port))

        try:
            es.index(doc_type=p_doctype, id=p_docid, body=p_document, index=p_index)
        except Exception:
            raise AssertionError("Index error on %s:%i/%s for document : %s",p_host,int(p_port),p_index,p_document)
コード例 #30
0
def install(fileCheckKey):
	elasticLatest='6.2.4'
	#Install Elasticsearch
	elasticInstalled=False
	if os.path.isfile('/etc/elasticsearch/elasticsearch.yml'):
		os.popen('sudo service elasticsearch start').read()
		while True:
			elasticVersion=os.popen("curl -XGET '127.0.0.1:9200'").read()
			try:
				jsonStuff=json.loads(elasticVersion)
				if jsonStuff['tagline'] == "You Know, for Search":
					elasticVersion=jsonStuff['version']['number']
					break
				else:
					print "Waiting for Elasticsearch to start..."
			except:
				print "Exception: Waiting for Elasticsearch to start..."
			sleep(10)
		if elasticLatest== elasticVersion.rstrip():
			elasticInstalled=True
	if elasticInstalled == False:
		print "Installing Elasticsearch"
		print "  Downloading Elasticsearch 6.2.4"
		os.popen('sudo wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-6.2.4.deb 2>&1').read()
		if not os.path.isfile('elasticsearch-6.2.4.deb'):
			sys.exit('Error downloading elasticsearch')
		if not hashCheck.checkHash('elasticsearch-6.2.4.deb'):
			sys.exit('Error downloading elasticsearch, mismatched file hashes')
		print "  Installing Elasticsearch"
		os.popen('sudo dpkg -i elasticsearch-6.2.4.deb').read()
		print "  Cleaning Up Installation Files"
		os.remove('elasticsearch-6.2.4.deb')
		os.popen('sudo update-rc.d elasticsearch defaults').read()
		#Change heap size to 500m (1/2 of phyical memory)
		shutil.move('/etc/elasticsearch/jvm.options','/etc/elasticsearch/jvm.orig')
		with open("/etc/elasticsearch/jvm.orig", "rt") as fileIn:
			with open("/etc/elasticsearch/jvm.options", "wt") as fileOut:
				for line in fileIn:
					if line.rstrip() == "-Xms2g":
						fileOut.write('-Xms256m\n')
					elif line.rstrip() == "-Xmx2g":
						fileOut.write('-Xmx256m\n')
					else:
						fileOut.write(line)
		print "  Starting Elasticsearch"
		os.popen('sudo systemctl enable elasticsearch.service').read()
		os.popen('sudo service elasticsearch start').read()
		#Sleeping 10 seconds to begin with to give it time to startup.
		sleep(10)
		while True:
			#writeSsIndex = os.popen(
			#	'curl -XPUT \'127.0.0.1:9200/sweet_security?pretty\' -H \'Content-Type: application/json\' -d\' {"mappings" : {"ports" : {"properties" : {"mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "port" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},"protocol" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},"name" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},  "product" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "version" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "lastSeen": { "type" : "date" }}}, "devices" : { "properties" : { "hostname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "nickname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "ip4" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "vendor" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "ignore" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "active" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "defaultFwAction" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "isolate" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "firstSeen" : { "type" : "date" }, "lastSeen" : { "type" : "date" }}}, "firewallProfiles" : { "properties" : { "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "destination" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, "action" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}}}}\'').read()
			ssIndex='curl -XPUT \'127.0.0.1:9200/sweet_security?pretty\' -H \'Content-Type: application/json\' -d\'' \
					' {"mappings" : {' \
					'   "ports" : {"properties" : {' \
					'     "mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "port" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},' \
					'     "protocol" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},' \
					'     "name" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}},  ' \
					'     "product" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "version" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "lastSeen": { "type" : "date" }}}, ' \
					'   "devices" : { "properties" : { ' \
					'     "hostname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "nickname" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "ip4" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "vendor" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "ignore" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "active" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "defaultFwAction" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "isolate" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "firstSeen" : { "type" : "date" }, ' \
					'     "lastSeen" : { "type" : "date" }}}, ' \
					'   "firewallProfiles" : { "properties" : { ' \
					'     "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "destination" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "action" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}},' \
					'   "sensors" : { "properties" : { ' \
					'     "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "sensorName" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "broHealth" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "logstashHealth" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "diskUsage" : { "type" : "integer"}, ' \
					'     "memAvailable" : { "type" : "integer"}, ' \
					'     "memPercent" : { "type" : "integer"}, ' \
					'     "memConsumed" : { "type" : "integer"}, ' \
					'     "firstSeen" : { "type" : "date" }, ' \
					'     "lastSeen" : { "type" : "date" }}} ' \
					'}}\''
			writeSsIndex = os.popen(ssIndex).read()

			try:
				jsonSS = json.loads(writeSsIndex)
				if jsonSS['acknowledged'] == True:
					print "  sweet_security index created"
					break
				else:
					print "Waiting for Elasticsearch to start, will try again in 10 seconds..."
			except:
				print "Error: Waiting for Elasticsearch to start, will try again in 10 seconds..."
			# Sleep 10 seconds to give ES time to get started
			sleep(10)
		while True:
			ssAlertIndex= 'curl -XPUT \'localhost:9200/sweet_security_alerts?pretty\' -H \'Content-Type: application/json\' -d\'{ ' \
				'  "mappings" : { ' \
				'    "alerts" : { "properties" : {  ' \
				'      "source" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}}, ' \
				'      "message" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},  ' \
				'      "mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
				'      "firstSeen" : { "type" : "date" }, ' \
				'      "addressedOn" : { "type" : "date" }, ' \
				'      "addressed" : { "type" : "integer"}' \
				'}}}}\''
			writeSsAlertIndex = os.popen(ssAlertIndex).read()
			try:
				jsonSSAlert = json.loads(writeSsAlertIndex)
				if jsonSSAlert['acknowledged'] == True:
					print "  sweet_security_alert index created"
					break
				else:
					print "Waiting for Elasticsearch to start, will try again in 10 seconds..."
			except:
				print "Error: Waiting for Elasticsearch to start, will try again in 10 seconds..."
			# Sleep 10 seconds to give ES time to get started
			sleep(10)
		try:
			try:
				from elasticsearch import Elasticsearch
			except:
				pass
			esService = Elasticsearch()
			if fileCheckKey is None:
				configData = {'defaultMonitor': 0, 'defaultIsolate': 0, 'defaultFW': 1, 'defaultLogRetention': 0}
			else:
				configData = {'defaultMonitor': 0, 'defaultIsolate': 0, 'defaultFW': 1, 'defaultLogRetention': 0,
							  'fileCheckKey': fileCheckKey}
			#Sleep a second to make sure index has fully created in ES
			sleep(1)
			esService.index(index='sweet_security', doc_type='configuration', body=configData)

		except Exception, e:
			print e
			pass
		while True:
			tardisIndex='curl -XPUT \'localhost:9200/tardis?pretty\' -H \'Content-Type: application/json\' -d\'' \
					' {"mappings" : {' \
					'   "known_dnsqueries" : {"properties" : {' \
					'     "mac" : {"type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "query" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}},' \
					'   "known_websites" : { "properties" : { ' \
					'     "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "server_name" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}}, ' \
					'   "firewallProfiles" : { "properties" : { ' \
					'     "mac" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "ip" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}, ' \
					'     "port" : { "type" : "text", "fields": {"keyword": {"type": "keyword"}}}}}' \
					'}}\''
			writeTardisIndex = os.popen(tardisIndex).read()
			#writeTardisIndex = os.popen('curl -XPUT \'localhost:9200/tardis?pretty\' -H \'Content-Type: application/json\' -d\' {"mappings" : {"known_hosts" : {"properties" : { "mac" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},"destination" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},"port" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}}}}}}\'').read()
			try:
				jsonSS = json.loads(writeTardisIndex)
				if jsonSS['acknowledged'] == True:
					print "  tardis index created"
					break
				else:
					print "Waiting for Elasticsearch to start, will try again in 10 seconds..."
			except:
				print "Error: Waiting for Elasticsearch to start, will try again in 10 seconds..."
			# Sleep 10 seconds to give ES time to get started
			sleep(10)
コード例 #31
0
class ElasticsearchWrapper:
    '''
    Elasticsearch 呼び出し ラッパー
        ・Elasticsearch自身はすでにサービスとして稼働しているものとする
        ・pythonの「elasticsearch」モジュールは事前にインストールすること
    '''
    def __init__(self, doc_type:str, index:str):
        '''
        初期化

        Parameters
        ----------
        doc_type : str
            ドキュメントタイプの名前
        index : str
            インデックスの名前
        '''
        self.es=Elasticsearch("localhost:9200")
        self.doc_type=doc_type
        self.index=index
    
    def delete_index(self):
        '''
        すでに存在するINDEXを削除する
        '''
        try:
            self.es.indices.delete(index=self.index)
        except:
            pass

    def make_index(self, setting:dict, mapping:dict):
        '''
        ElasticsearchのINDEX登録処理

        Parameters
        ----------
        setting : dict
            setting指定のJSONデータ
        mapping : dict
            mapping指定のJSONデータ
        '''
        # settingsを指定してインデックスを作成
        self.es.indices.create(index=self.index, body=setting)
        
        # 作成したインデックスのマッピングを指定
        self.es.indices.put_mapping(index=self.index, doc_type=self.doc_type, body=mapping)

    def insert_one(self, doc:dict):
        '''
        1データを登録する

        Parameters
        ----------
        doc : dict
            登録するJSONデータ
        '''
        self.es.index(index=self.index, doc_type=self.doc_type, body=doc)
        # id で連番を振っておくと、idでgetできるようになる
        # id を指定しないと、内部で任意のユニークな文字列が割り当てられる
        # ここでは、登録順の番号での取得はしないし、検索にはdoc内の項目を用いるので
        # id無しの登録でよい
        #self.es.index(index=self.index, doc_type=self.doc_type, body=doc, id=idx)

    def insert_array(self, docs:list):
        '''
        配列データを登録する

        Parameters
        ----------
        docs : list of dict
            登録するJSONデータの配列
        '''
        for doc in docs:
            self.es.index(index=self.index, doc_type=self.doc_type, body=doc)

    def search_and(self, items:dict, count:int = 10):
        '''
        ディクショナリで定義された項目(名前、値)のAND条件での検索を行う

        Parameters
        ----------
        items : dict
            項目(名前、値)の一覧
        count : int
            検索結果の上限数、無指定の場合の初期値10
        '''
        query = {
            "query": {
                "bool" : {
                    "must":[{"match":{key : val}} for key, val in items.items()]
                }
            }
        }

        return self.__search(query, count)

    def __search(self, query:dict, count:int):
        '''
        queryで指定された検索式で、Elasticsearchを検索する

        Parameters
        ----------
        query : dict
            Elasticsearchの検索Query
        count : int
            検索結果の上限数
        '''
        results = []
        params = {
            'size':count
        }
        for i in self.es.search(index=self.index, doc_type=self.doc_type, body=query, params=params)["hits"]["hits"]:
            body = copy.deepcopy(i["_source"])
            score = i['_score']
            result = {'body':body, 'score':score}
            results.append(result)
        return results
コード例 #32
0
count = es.count(index=index_name)['count']
logging.info("Document Count: %s", str(count))
article_id = 1
if not (count > 0):
    logging.info("Documents deleted. Commencing re-indexing..")
    doc_list = list_blobs_with_prefix('pubmedbot', 'txt/')
    for doc in doc_list:
        file_name = doc.name
        download_blob('pubmedbot', file_name, file_name)
        pmid = file_name.split('.')[0]
        with open(file_name, 'r') as f:
            text = f.read()
        new_text = re.sub(r'\n\n.*et al\n', "", text)
        new_text = re.sub(r'\.\n\n', "\.\n", new_text)
        new_text = re.sub(r'Page [0-9]+ of [0-9]+\n', "", new_text)
        new_text = clean_unidentified_characters(new_text)
        new_text = re.sub(r'(\n )+', "\n", new_text).strip()
        new_text = re.sub(r'\n+', "\n", new_text).strip()
        paragraphs = re.split(r'\.( )*\n', new_text)
        dicts = []
        for para in paragraphs:
            article = {'pmid': pmid, 'text': para}
            try:
                es.index(index=index_name, id=article_id, body=article)
            except RequestError as e:
                with open('error.log', 'a+') as f:
                    f.write(e)
                continue
            article_id = article_id + 1
        os.remove(file_name)
                    scroll='3m',
                    size=10000)
    #res = es.search(index="not_busy_list_airtel", doc_type='class', body={
    #    "query": {"bool": {"must": [{"term": {"startdate": busy_list[i]}}, {"term": {"today": today}}]}}},
    #                scroll='3m', size=10000)
    # not_busy_list 안에 있는 정보들 중 출발 날짜가 성수기인 것들을 모조리 search 함
    for doc in res['hits']['hits']:
        # 찾은 서치들의 정보들을 doc 이라고 함
        res1 = es.get(index="not_busy_list_airtel",
                      doc_type='class',
                      id=doc['_id'])
        # 성수기 출발상품을 포함하는 아이디들을 호출
        doc1 = res1['_source']
        # 성수기 출발 상품들의 내용이 doc1
        res2 = es.index(index="busydays_airtel",
                        doc_type='class',
                        id=doc['_id'],
                        body=doc1)
        # busydays라는 인덱스를 생성하고 동일한 id로 성수기 출발 상품들의 내용을 집어 넣음
        es.delete(index="not_busy_list_airtel",
                  doc_type='class',
                  id=doc['_id'])
        # not_busy_list에서 성수기 때 출발하는 상품들의 id들을 삭제함
        # 이로서 성수기 필터 완료

for i in range(0, len(today_list)):
    # 6개월 후의 상품들을 모두 삭제
    datetime_object = datetime.datetime.strptime(today_list[i], '%Y-%m-%d')
    #datetime_object = datetime.datetime.strptime(today, '%Y-%m-%d')
    five_after = monthdelta(datetime_object, 5)
    six_after = monthdelta(datetime_object, 6)
    firstday = five_after.replace(day=1)
コード例 #34
0
# Připojení k ES
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

# Kontrola zda existuje index 'person'
if not es.indices.exists(index=INDEX_NAME):
    # Vytvoření indexu
    es.indices.create(index=INDEX_NAME)

# Index není potřeba vytvářet - pokud neexistuje, tak se automaticky vytvoří při vložení prvního dokumentu

# 1. Vložte osobu se jménem John
print_delimiter(1)
person = {
    "firstname": "John",
}
print(es.index(index=INDEX_NAME, id=1, body=person))

# 2. Vypište vytvořenou osobu (pomocí get a parametru id)
print_delimiter(2)
print(es.get(index=INDEX_NAME, id=1))

# 3. Vypište všechny osoby (pomocí search)
print_delimiter(3)
print(es.search(index=INDEX_NAME, body={'query': {'match_all': {}}}))

# 4. Přejmenujte vytvořenou osobu na 'Jane'
print_delimiter(4)
print(es.update(index=INDEX_NAME, id=1, body={"doc": {"firstname": "Jane"}}))
print(es.get(index=INDEX_NAME, id=1))
# 5. Smažte vytvořenou osobu
print_delimiter(5)
コード例 #35
0
ファイル: Index.py プロジェクト: zzzz123321/elasticsearch-py
'''
Licensed to Elasticsearch B.V under one or more agreements.
Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
See the LICENSE file in the project root for more information
'''

from elasticsearch import Elasticsearch

es = Elasticsearch()

print("bb143628fd04070683eeeadc9406d9cc - L:11")
# tag::bb143628fd04070683eeeadc9406d9cc[]
response = es.index(index='twitter',
                    id=1,
                    body={
                        'user': '******',
                        'post_date': '2009-11-15T14:12:12',
                        'message': 'trying out Elasticsearch',
                    })
# end::bb143628fd04070683eeeadc9406d9cc[]
print("---------------------------------------")
print(response)
print("---------------------------------------")

print("804a97ff4d0613e6568e4efb19c52021 - L:77")
print("TODO")

print("d718b63cf1b6591a1d59a0cf4fd995eb - L:121")
# tag::d718b63cf1b6591a1d59a0cf4fd995eb[]
response = es.index(
    index='twitter',
コード例 #36
0
 def process_item(self, item, spider):
     es = Elasticsearch(self.es_hosts)
     es.index(index=self.index_name, doc_type=self.index_type, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8"))
     # es.index(index=self.index_name, doc_type=self.index_type, pipeline=self.ingest_pipeline, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8"))
     return item
コード例 #37
0
ファイル: datastore.py プロジェクト: theshiv303/aleph
class DataStore(object):

    es = None
    tracer = None

    def __init__(self):

        self.es = Elasticsearch(ELASTICSEARCH_URI)
        self.tracer = logging.getLogger('elasticsearch.trace')

        if ELASTICSEARCH_TRACE:
            self.tracer.setLevel(logging.DEBUG)
            self.tracer.addHandler(logging.FileHandler(LOGGING['filename']))
        else:
            self.tracer.addHandler(logging.NullHandler())

    def update(self, doc_id, partial_body):
        self.es.update(index=ELASTICSEARCH_INDEX,
                       id=doc_id,
                       doc_type='sample',
                       body={'doc': partial_body})

    def setup(self):
        self.es.indices.create(index=ELASTICSEARCH_INDEX,
                               ignore=400)  # Ignore already exists

    def count(self, q=None):

        if q:
            result = self.es.count(index=ELASTICSEARCH_INDEX,
                                   doc_type='sample',
                                   q=q)
        else:
            result = self.es.count(index=ELASTICSEARCH_INDEX,
                                   doc_type='sample')
        return result['count']

    def all(self, size=10, start=0):
        try:
            result = self.es.search(index=ELASTICSEARCH_INDEX,
                                    doc_type='sample',
                                    body={
                                        'query': {
                                            'match_all': {},
                                        },
                                        'from': start,
                                        'size': size,
                                        "sort": {
                                            "timestamp": {
                                                'order': 'desc'
                                            },
                                        }
                                    })
        except NotFoundError:
            pass
        except Exception:
            raise

        return result

    def lucene_search(self, query, start=0, size=15):

        try:
            body = {
                "sort": {
                    "timestamp": {
                        'order': 'desc'
                    },
                }
            }
            result = self.es.search(index=ELASTICSEARCH_INDEX,
                                    doc_type='sample',
                                    q=query,
                                    from_=start,
                                    size=size,
                                    body=body)
        except NotFoundError:
            pass
        except Exception:
            raise

        return result

    def search(self, query):

        result = []

        try:
            result = self.es.search(index=ELASTICSEARCH_INDEX,
                                    doc_type='sample',
                                    body={'query': {
                                        'term': query
                                    }})
        except NotFoundError:
            pass
        except Exception:
            raise

        return result

    def save(self, doc_data, doc_id):
        return self.merge_document('samples', 'sample', doc_data, doc_id)

    def get(self, doc_id):

        return self.es.get(index='samples', doc_type='sample',
                           id=doc_id)['_source']

    def merge_document(self, index, doc_type, doc_data, doc_id):

        try:
            self.es.indices.refresh(index)
        except Exception as e:
            raise IOError("Error updating ES index %s (%s)" % (index, e))

        original_document = {}

        # Try to get current data if available
        try:
            original_document = self.es.get(index=index,
                                            doc_type=doc_type,
                                            id=doc_id)
            if 'hits' in original_document and original_document['hits'][
                    'total'] != 0:
                original_document = original_document['_source']
            else:
                original_document = {}
        except NotFoundError as e:
            pass  # not found, proceed
        except Exception as e:
            raise e

        if len(original_document) == 0:
            return self.es.index(index, doc_type, doc_data, id=doc_id)

        # Merge and index
        merged_document = dict_merge(original_document, doc_data)

        return self.es.index(index=index,
                             doc_type=doc_type,
                             body=merged_document,
                             id=doc_id)
コード例 #38
0
# Created on 29.06.18

from datetime import datetime
import json
from elasticsearch import Elasticsearch

es = Elasticsearch('elk.hogwarts.servida.ch:9200')

with open(
        "../../2018-05-17T10_54_28/server_stream_post_requests.json") as file:
    requests = json.load(file)

# es.indices.create(index='ismartalarm-dfrws', body={
#    'settings': {
#          'index': {
#               'number_of_shards': 1,
#               'number_of_replicas': 0
#          }
#    }
# })

i = 0
for path in requests:
    for request in requests[path]:
        print(request)
        es.index(index='ismartalarm-dfrws',
                 doc_type='post_requests',
                 id=i,
                 body=request)
        i += 1
print(i)
コード例 #39
0
  ELASTICSEARCH_HOST = url.hostname
  ELASTICSEARCH_AUTH = url.username + ':' + url.password
  es = Elasticsearch([{'host': ELASTICSEARCH_HOST}], http_auth=ELASTICSEARCH_AUTH)
else:
  es = Elasticsearch()

files_given = sys.argv
for file_name in files_given:
  if file_name == 'index_addresses.py':
    continue
  else:
    file_path = file_name
    print 'adding ' + file_path

    with open(file_path, 'r') as csvfile:
      print "open file"
      csv_reader = csv.DictReader(csvfile, fieldnames=[], restkey='undefined-fieldnames', delimiter=',')

      current_row = 0
      for row in csv_reader:
        current_row += 1
        if current_row == 1:
          csv_reader.fieldnames = row['undefined-fieldnames']
          continue
        address = row
        if current_row % 1000 == 0:
          print "%s addresses indexed" % current_row
        es.index(index='addresses', doc_type='address', id=current_row-1, body={'NUMBER': address[' NUMBER'], 'STREET': address[' STREET'], 'ADDRESS': address[' NUMBER'] + ' ' + address[' STREET'], 'X': address['LON'], 'Y': address[' LAT']})

    csvfile.close()
コード例 #40
0
class NmapES:
	"This class will parse an Nmap XML file and send data to Elasticsearch"

	def __init__(self, input_file,es_ip,es_port,index_name):
		self.input_file = input_file
		self.tree = self.__importXML()
		self.root = self.tree.getroot()
		self.es = Elasticsearch([{'host':es_ip,'port':es_port}])
		self.index_name = index_name

	def displayInputFileName(self):
		print(self.input_file)

	def __importXML(self):
		# Parse XML directly from the file path
		return xml.parse(self.input_file)

	def toES(self):
		"Returns a list of dictionaries (only for open ports) for each host in the report"
		for h in self.root.iter('host'):
			dict_item = {}
			dict_item['scanner'] = 'nmap'
			if h.tag == 'host':
				if 'endtime' in h.attrib and h.attrib['endtime']:
					dict_item['time'] = time.strftime('%Y/%m/%d %H:%M:%S', time.gmtime(float(h.attrib['endtime'])))
			
			for c in h:
				if c.tag == 'address':
					if c.attrib['addr'] and c.attrib['addrtype'] == 'ipv4':
						dict_item['ip'] = c.attrib['addr']
					if c.attrib['addr'] and c.attrib['addrtype'] == 'mac':
						dict_item['mac'] = c.attrib['addr']

				elif c.tag == 'hostnames':
					for names in c.getchildren():
						if names.attrib['name']:
							dict_item['hostname'] = names.attrib['name']

				elif c.tag == 'ports':
					for port in c.getchildren():
						dict_item_ports = {}
						if port.tag == 'port':
							# print(port.tag, port.attrib)
							dict_item_ports['port'] = port.attrib['portid']
							dict_item_ports['protocol'] = port.attrib['protocol']
							for p in port.getchildren():
								if p.tag == 'state':
									dict_item_ports['state'] = p.attrib['state']
								elif p.tag == 'service':
									dict_item_ports['service'] = p.attrib['name']
									if 'product' in p.attrib and p.attrib['product']:
										dict_item_ports['product_name'] = p.attrib['product']
										if 'version' in p.attrib and p.attrib['version']:
											dict_item_ports['product_version'] = p.attrib['version']
									if 'banner' in p.attrib and p.attrib['banner']:
										dict_item_ports['banner'] = p.attrib['banner']
								elif p.tag == 'script':
									if p.attrib['id']:
										if p.attrib['output']:
											if 'scripts' in dict_item_ports:
												dict_item_ports['scripts'][p.attrib['id']] = p.attrib['output']
											else:
												dict_item_ports['scripts'] = dict()
												dict_item_ports['scripts'][p.attrib['id']] = p.attrib['output']
													
							to_upload = merge_two_dicts(dict_item, dict_item_ports)	
							if to_upload['state'] == 'open':
								self.es.index(index=self.index_name, doc_type="vuln", body=json.dumps(to_upload))
コード例 #41
0
es = Elasticsearch()
# es = Elasticsearch([{'host': 'd.es.dataapi.rea-asia.com', 'port': 9200}])

doc = {
    'author': 'Kamal',
    "searched_keyword": {
        "search_keyword": "Sunday spk",
        "matched_places": {
            "Sunday spk …": 90,
            "Sunday spkksjff": 89,
            "XXXXXXXXX": 80
        }
    },
    'timestamp': datetime.now(),
}
res = es.index(index="keyword", doc_type='search_submit', id=1, body=doc)

string_matching = {
    'searchkeyword': 'midvalley',
    'text': 'most relevant search keywords according to db',
    'timestamp': datetime.now(),
    'matched_placekeywords': {
        1: 'mid valley city',
        2: 'mid valley gardens',
        3: 'mid valley gardens'
    }
}
res = es.index(index="midvalley",
               doc_type='keywords-search',
               id=4,
               body=string_matching)
コード例 #42
0
class ElasticsearchStorage(ExtractedInformationStorage):
    """
    Handles remote storage of the meta data in Elasticsearch
    """

    log = None
    cfg = None
    es = None
    index_current = None
    index_archive = None
    mapping = None
    running = False

    def __init__(self):
        self.log = logging.getLogger('elasticsearch.trace')
        self.log.addHandler(logging.NullHandler())
        self.cfg = CrawlerConfig.get_instance()
        self.database = self.cfg.section("Elasticsearch")

        self.es = Elasticsearch(
            [self.database["host"]],
            http_auth=(str(self.database["username"]),
                       str(self.database["secret"])),
            port=self.database["port"],
            use_ssl=self.database["use_ca_certificates"],
            verify_certs=self.database["use_ca_certificates"],
            ca_certs=self.database["ca_cert_path"],
            client_cert=self.database["client_cert_path"],
            client_key=self.database["client_key_path"])
        self.index_current = self.database["index_current"]
        self.index_archive = self.database["index_archive"]
        self.mapping = self.database["mapping"]

        # check connection to Database and set the configuration

        try:
            # check if server is available
            self.es.ping()

            # raise logging level due to indices.exists() habit of logging a warning if an index doesn't exist.
            es_log = logging.getLogger('elasticsearch')
            es_level = es_log.getEffectiveLevel()
            es_log.setLevel('ERROR')

            # check if the necessary indices exist and create them if needed
            if not self.es.indices.exists(self.index_current):
                self.es.indices.create(index=self.index_current,
                                       ignore=[400, 404])
                self.es.indices.put_mapping(index=self.index_current,
                                            body=self.mapping)
            if not self.es.indices.exists(self.index_archive):
                self.es.indices.create(index=self.index_archive,
                                       ignore=[400, 404])
                self.es.indices.put_mapping(index=self.index_archive,
                                            body=self.mapping)
            self.running = True

            # restore previous logging level
            es_log.setLevel(es_level)

        except ConnectionError as error:
            self.running = False
            self.log.error(
                "Failed to connect to Elasticsearch, this module will be deactivated. "
                "Please check if the database is running and the config is correct: %s"
                % error)

    def process_item(self, item, spider):

        if self.running:
            try:
                version = 1
                ancestor = None

                # search for previous version
                request = self.es.search(
                    index=self.index_current,
                    body={'query': {
                        'match': {
                            'url.keyword': item['url']
                        }
                    }})
                if request['hits']['total']['value'] > 0:
                    # save old version into index_archive
                    old_version = request['hits']['hits'][0]
                    old_version['_source']['descendent'] = True
                    self.es.index(index=self.index_archive,
                                  doc_type='_doc',
                                  body=old_version['_source'])
                    version += 1
                    ancestor = old_version['_id']

                # save new version into old id of index_current
                self.log.info("Saving to Elasticsearch: %s" % item['url'])
                extracted_info = ExtractedInformationStorage.extract_relevant_info(
                    item)
                extracted_info['ancestor'] = ancestor
                extracted_info['version'] = version
                self.es.index(index=self.index_current,
                              doc_type='_doc',
                              id=ancestor,
                              body=extracted_info)

            except ConnectionError as error:
                self.running = False
                self.log.error(
                    "Lost connection to Elasticsearch, this module will be deactivated: %s"
                    % error)
        return item
コード例 #43
0
ファイル: utest.py プロジェクト: zhanglei/esql5
def exec_query(stmt):

    my_lexer=lex(module=lexer,optimize=True,debug=True)
       
    my_parser=yacc(debug=True,module=parser)
    
    val = my_parser.parse(lexer=my_lexer.clone(),debug=False,input=sql)

    es = Elasticsearch([{'host':"10.68.23.81","port":9201}])
    
    
    val.debug()
    
    if val.get_type() == TK.TOK_QUERY:
        
        query = Query(val)
        
        print(query.dsl())
        
        print(query._index,query._type)
        
        res = es.search(index=query._index, doc_type = query._type, body=query.dsl(), request_timeout=100)
      
        stmt_res = response_hits(res)
      
        print(json.dumps(stmt_res,indent=4))
        
    elif val.get_type() == TK.TOK_CREATE_TABLE:
        
        stmt = Create(val)
        
        res = es.indices.create(index=stmt._index,body = stmt._options,request_timeout=100,ignore= 400)
    
        res = es.indices.put_mapping(index = stmt._index, doc_type = stmt._type, body = stmt.dsl(), request_timeout=100)
    
        print(json.dumps(res,indent=4))
        
    elif val.get_type() == TK.TOK_INSERT_INTO:
        
#         val.debug()
        
        stmt = Insert(val)
        
        parms = stmt.metas
        
        res = es.index(index = stmt._index,doc_type =  stmt._type, body = stmt.dsl(),**parms)
        
        print(json.dumps(res,indent=4))
        
    elif val.get_type() == TK.TOK_BULK_INTO:
        
#         val.debug()
        
        
        stmt = Bulk(val)
        
        res = es.bulk(index = stmt._index,doc_type = stmt._type, body = stmt.dsl())
        
        print(json.dumps(res,indent=4))
        
    
    elif val.get_type() == TK.TOK_UPDATE:
        
        val.debug()
        
        stmt = Update(val)
        
        print(json.dumps(stmt.dsl(),indent=4))
        
        res = es.update(index = stmt._index, doc_type = stmt._type, body = stmt.dsl(), **stmt.conditions)
        
        
        print(json.dumps(res,indent=4))
    
    
    elif val.get_type() == TK.TOK_UPSERT_INTO:
        
        val.debug()
        
        stmt = Upsert(val)
        
        print(json.dumps(stmt.dsl(),indent=4))
        
        res = es.update(index = stmt._index, doc_type = stmt._type, body = stmt.dsl(), **stmt.conditions)
        
        
        print(json.dumps(res,indent=4))
    
    
    elif val.get_type() == TK.TOK_DELETE:
        
        val.debug()
        
        stmt = Delete(val)
        
        res = es.delete(index = stmt._index, doc_type = stmt._type, **stmt.conditions,ignore= 404)

        print(json.dumps(res,indent=4))
        
        
    elif val.get_type() == TK.TOK_EXPLAIN:
        stmt = Explain(val)
        print(stmt.curl_str)
        print(json.dumps(stmt.dsl(),indent=4))
    
    elif val.get_type() == TK.TOK_DESC_TABLE:
        
        stmt = Describe(val)
        
        
        res = es.indices.get_mapping(index = stmt._index,doc_type=stmt._type)
        
        print(res)
        
        
    else:
        res = es.cat.indices(index = 'qs_test*', v=True)
        val.debug()
        print(res)
コード例 #44
0
class ElasticConnector(object):
    def __init__(self):
        ini_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                '../elastic.ini')

        config = configparser.ConfigParser()
        config.read(ini_file)

        conf_url = config.get('env', 'url')
        conf_verify_certs = False if config.get(
            'env', 'verify_certs').lower() == "false" else True

        self.es = Elasticsearch(conf_url, verify_certs=False)
        self.settings = {
            "settings": {
                "index": {
                    "creation_date": "1533116700171",
                    "number_of_shards": "5",
                    "number_of_replicas": "1",
                    "uuid": "3sQuMexES4WE8D5f89INFA",
                    "version": {
                        "created": "6020399"
                    },
                    "provided_name": "autonapt"
                }
            }
        }
        self.mapping = {
            "log": {
                "properties": {
                    "client": {
                        "properties": {
                            "local": {
                                "properties": {
                                    "address": {
                                        "type": "ip",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        }
                                    },
                                    "port": {
                                        "type": "long"
                                    }
                                }
                            },
                            "remote": {
                                "properties": {
                                    "address": {
                                        "type": "ip",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        }
                                    },
                                    "geoip": {
                                        "properties": {
                                            "asn": {
                                                "properties": {
                                                    "asn": {
                                                        "type": "text",
                                                        "fields": {
                                                            "keyword": {
                                                                "type":
                                                                "keyword",
                                                                "ignore_above":
                                                                256
                                                            }
                                                        }
                                                    }
                                                }
                                            },
                                            "city": {
                                                "properties": {
                                                    "divisions": {
                                                        "type": "text",
                                                        "fields": {
                                                            "keyword": {
                                                                "type":
                                                                "keyword",
                                                                "ignore_above":
                                                                256
                                                            }
                                                        }
                                                    },
                                                    "iso_code": {
                                                        "type": "text",
                                                        "fields": {
                                                            "keyword": {
                                                                "type":
                                                                "keyword",
                                                                "ignore_above":
                                                                256
                                                            }
                                                        }
                                                    },
                                                    "location": {
                                                        "type": "geo_point"
                                                    },
                                                    "name": {
                                                        "type": "text",
                                                        "fields": {
                                                            "keyword": {
                                                                "type":
                                                                "keyword",
                                                                "ignore_above":
                                                                256
                                                            }
                                                        }
                                                    },
                                                    "postal_code": {
                                                        "type": "text",
                                                        "fields": {
                                                            "keyword": {
                                                                "type":
                                                                "keyword",
                                                                "ignore_above":
                                                                256
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    },
                                    "port": {
                                        "type": "long"
                                    }
                                }
                            }
                        }
                    },
                    "connection_id": {
                        "type": "long"
                    },
                    "datetime": {
                        "type": "date",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "protocol": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "server": {
                        "properties": {
                            "local": {
                                "properties": {
                                    "address": {
                                        "type": "text",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        }
                                    },
                                    "port": {
                                        "type": "long"
                                    }
                                }
                            },
                            "remote": {
                                "properties": {
                                    "address": {
                                        "type": "text",
                                        "fields": {
                                            "keyword": {
                                                "type": "keyword",
                                                "ignore_above": 256
                                            }
                                        }
                                    },
                                    "port": {
                                        "type": "long"
                                    }
                                }
                            }
                        }
                    },
                    "type": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    }
                }
            }
        }

    def create(self, index_name):
        if self.es.indices.exists(index=index_name):
            return

        #self.es.indices.create(index=index_name, body=self.settings)
        self.es.indices.create(index=index_name)
        self.es.indices.put_mapping(index=index_name,
                                    doc_type='log',
                                    body=self.mapping)

    def store(self, datas):
        tdatetime = dt.now()
        index_name = "autonapt-%s" % (tdatetime.strftime('%Y%m%d'))
        self.create(index_name)

        # 2018-08-01 09:49:53.571078
        tstr = datas['datetime']
        datas['datetime'] = dt.strptime(tstr, '%Y-%m-%d %H:%M:%S.%f')

        return self.es.index(index=index_name, doc_type="log", body=datas)

    def search(self, datas):
        return self.es.search(index="autonapt-*", body=datas)

    def delete(self):
        return self.es.indices.delete(index="autonapt-*")
コード例 #45
0
ファイル: KafkaCon.py プロジェクト: Radhika-Goel/pycharm
for message in consumer:
    print('Running Consumer..')
    #parsed_records = []
    #record = msg.value
    print("%s:%d:%d: key=%s value=%s" %
          (message.topic, message.partition, message.offset, message.key,
           message.value))
    #print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
    #									 message.offset,
    #									 message.value))
    #Values = record['Radhikas-MacBook-Pro.local']
    #recordTime = Values['recordTime']
    #print(record)
    #print(type(msg))
    #es.index(index='monitor', doc_type='test',body=json.loads(json.dumps(docket_content)))
    record = json.loads(message.value)
    newValue = record['data']
    print(newValue)
    #data = message.value.replace("\\", r"\\")
    #print(json.dumps(data))
    es.index(index='monitor', doc_type='_doc', id=i, body=json.loads(newValue))
    i = i + 1

#client = MongoClient('localhost:27017')
#collection = client.numtest.numtest

#for message in consumer:
#	message = message.value
#	collection.insert_one(message)
#	print('{} added to {}'.format(message, collection))
コード例 #46
0
ファイル: es_python3.py プロジェクト: bluflowr/es_kibana
                                   "fields": {
                                       "keyword": {
                                           "type": "keyword",
                                           "ignore_above": 256
                                       }
                                   }
                               },
                               "sibsp": {
                                   "type": "long"
                               },
                               "survived": {
                                   "type": "long"
                               },
                               "ticket": {
                                   "type": "text",
                                   "fields": {
                                       "keyword": {
                                           "type": "keyword",
                                           "ignore_above": 256
                                       }
                                   }
                               }
                           }
                       })

# For a small dataset, you can create each document individually
# Otherwise use es.bulk

for item in to_json:
    es.index(index='titanic', doc_type='people', body=json.dumps(item))
コード例 #47
0
import json

var = 1
while var == 1:
    # Initialize Kafka consumer
    consumer = KafkaConsumer(
        bootstrap_servers=
        'ec2-18-188-248-171.us-east-2.compute.amazonaws.com:9092',
        group_id='p3consumer',
        auto_offset_reset='latest')
    consumer.subscribe(['instagram'])

    # Initialize Elasticsearch client
    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

    # Read incoming messages
    for message in consumer:
        if "Username" in message.value:

            # Parse message text
            content = message.value
            json_string = content.split('(data-HEAP): ')[1]

            stalker_report_json = json.loads(json_string)

            # Push the data to Elasticsearch index
            es.index(index='instagram_report',
                     doc_type='report',
                     id=stalker_report_json['ImageAnalysis']['Id'],
                     body=stalker_report_json)
コード例 #48
0
ファイル: es.py プロジェクト: pytoolkits/storagekit
class ESStorage(LogStorage):
    def __init__(self, config):
        hosts = config.get("HOSTS")
        kwargs = config.get("OTHER", {})
        self.index = config.get("INDEX") or 'jumpserver'
        self.doc_type = config.get("DOC_TYPE") or 'command_store'
        self.es = Elasticsearch(hosts=hosts, **kwargs)

    @staticmethod
    def make_data(command):
        data = dict(user=command["user"],
                    asset=command["asset"],
                    system_user=command["system_user"],
                    input=command["input"],
                    output=command["output"],
                    risk_level=command["risk_level"],
                    session=command["session"],
                    timestamp=command["timestamp"])
        data["date"] = datetime.fromtimestamp(command['timestamp'],
                                              tz=pytz.UTC)
        return data

    def bulk_save(self, command_set, raise_on_error=True):
        actions = []
        for command in command_set:
            data = dict(
                _index=self.index,
                _type=self.doc_type,
                _source=self.make_data(command),
            )
            actions.append(data)
        return bulk(self.es,
                    actions,
                    index=self.index,
                    raise_on_error=raise_on_error)

    def save(self, command):
        """
        保存命令到数据库
        """
        data = self.make_data(command)
        return self.es.index(index=self.index,
                             doc_type=self.doc_type,
                             body=data)

    @staticmethod
    def get_query_body(match=None, exact=None, date_from=None, date_to=None):
        if date_to is None:
            date_to = datetime.now()
        if date_from is None:
            date_from = date_to - timedelta(days=7)

        time_from = date_from.timestamp()
        time_to = date_to.timestamp()

        body = {
            "query": {
                "bool": {
                    "must": [],
                    "must_not": [],
                    "filter": [{
                        "range": {
                            "timestamp": {
                                "gte": time_from,
                                "lte": time_to,
                            }
                        }
                    }]
                }
            },
            "sort": {
                "timestamp": {
                    "order": "desc",
                }
            }
        }
        if match:
            for k, v in match.items():
                # 默认组织的org_id为""
                if k == 'org_id' and v == '':
                    body["query"]["bool"]["must_not"].append(
                        {"wildcard": {
                            k: "*"
                        }})
                    continue
                body["query"]["bool"]["must"].append({"match": {k: v}})
        if exact:
            for k, v in exact.items():
                body["query"]["bool"]["filter"].append({"term": {k: v}})
        return body

    def filter(self,
               date_from=None,
               date_to=None,
               user=None,
               asset=None,
               system_user=None,
               input=None,
               session=None,
               risk_level=None,
               org_id=None):

        match = {}
        exact = {}

        if user:
            exact["user"] = user
        if asset:
            exact["asset"] = asset
        if system_user:
            exact["system_user"] = system_user

        if session:
            match["session"] = session
        if input:
            match["input"] = input
        if org_id is not None:
            match["org_id"] = org_id
        if risk_level is not None:
            match['risk_level'] = risk_level

        body = self.get_query_body(match, exact, date_from, date_to)

        # Get total count (Because default size=10)
        data = self.es.search(index=self.index,
                              doc_type=self.doc_type,
                              body=body,
                              size=0)
        total = data["hits"]["total"]

        data = self.es.search(index=self.index,
                              doc_type=self.doc_type,
                              body=body,
                              size=total)
        return data["hits"]

    def count(self,
              date_from=None,
              date_to=None,
              user=None,
              asset=None,
              system_user=None,
              input=None,
              session=None):
        match = {}
        exact = {}

        if user:
            exact["user"] = user
        if asset:
            exact["asset"] = asset
        if system_user:
            exact["system_user"] = system_user

        if session:
            match["session"] = session
        if input:
            match["input"] = input
        body = self.get_query_body(match, exact, date_from, date_to)
        del body["sort"]
        data = self.es.count(body=body)
        return data["count"]

    def __getattr__(self, item):
        return getattr(self.es, item)

    def all(self):
        """返回所有数据"""
        raise NotImplementedError("Not support")

    def ping(self):
        try:
            return self.es.ping()
        except Exception:
            return False
コード例 #49
0
import urllib2
import json
from elasticsearch import Elasticsearch

response = urllib2.urlopen(
    'https://api.douban.com/v2/movie/top250?start=200&count=50')
html = response.read()
top_250_json = json.loads(html)
es = Elasticsearch()
for movie_json in top_250_json['subjects']:
    try:
        movie_url = "https://api.douban.com/v2/movie/" + movie_json['id']
        response = urllib2.urlopen(movie_url, timeout=60)
        html = response.read()
        detail_movie_json = json.loads(html)
        print detail_movie_json
        res = es.index(index="douban",
                       doc_type='movie',
                       id=movie_json['id'],
                       body=detail_movie_json)
    except:
        print "except!!"
コード例 #50
0
# sensors 400-600 are Parking space sensors
# sensors 600- 800 are Luminosity sensors
# sensors 800-1000 are Garbage sensors
      sensor.value = (random.randint(8,21))         # Generate the values for Smoke in the range from 8 to 21
      t = str(sensor.id).encode()                   # Convert the id to string so it can be sent to the TCP connection
      print (sensor.id)                             # Print the sensor id
      s.send(t)                                     # Send the id on the TCP connection
      s.send(",")                                   # Send a comma to separate id and value
      m = str(sensor.value).encode()                # Convert sensor value to string so it can be sent to TCP connection
      s.send(m)                                     # Send the id on the TCP connection
      s.send(",")                                   # Send a comma to separate value and location
      k = str(sensor.location).encode()             # Convert sensor location to string so it can be sent to TCP connection
      s.send(k)                                     # Send the location on the TCP connection
      s.send("\n")                                  # Send a newline character so the next set of values be printed on next line
      sensor.id = sensor.id+1                       # Increment the sensor id by 1
      sensor.location = sensor.location+1           # Increment the sensor location by 1
      time.sleep(1)                                 # Wait for 1 second to generate next value
      # Create a document to be sent to elasticsearch
      doc = {
          'sensorid': sensor.id,
          'value': sensor.value,
          'location': sensor.location,
          'timestamp': datetime.datetime.now(),
      }
      res = es.index(index="iot", doc_type='smart_building', body=doc)   # Index the document in elasticsearch
      print(res['created'])                                          # Print if indexed successfully
      if(sensor.id==1001):                           # Reset the sensor id and location when it reaches 100 sensors
            sensor.id = 901                           # Reset sensor id
            sensor.location = 901                  # Reset sensor location
s.close()
コード例 #51
0
class ElasticsearchConnector:
    def __init__(self):
        # Initialize parameters and OpenCTI helper
        config_file_path = os.path.dirname(
            os.path.abspath(__file__)) + "/config.yml"
        config = (yaml.load(open(config_file_path), Loader=yaml.FullLoader)
                  if os.path.isfile(config_file_path) else {})
        self.helper = OpenCTIConnectorHelper(config)

        self.elasticsearch_url = get_config_variable("ELASTICSEARCH_URL",
                                                     ["elasticsearch", "url"],
                                                     config)
        self.elasticsearch_ssl_verify = get_config_variable(
            "ELASTICSEARCH_SSL_VERIFY",
            ["elasticsearch", "ssl_verify"],
            config,
            False,
            True,
        )
        self.elasticsearch_login = get_config_variable(
            "ELASTICSEARCH_LOGIN", ["elasticsearch", "login"], config)
        self.elasticsearch_password = get_config_variable(
            "ELASTICSEARCH_PASSWORD", ["elasticsearch", "password"], config)
        self.elasticsearch_index = get_config_variable(
            "ELASTICSEARCH_INDEX", ["elasticsearch", "index"], config)

        if (self.helper.connect_live_stream_id is None
                or self.helper.connect_live_stream_id == "ChangeMe"):
            raise ValueError("Missing Live Stream ID")

        # Initilize connection to Elastic
        if (self.elasticsearch_login is not None
                and len(self.elasticsearch_login) > 0
                and self.elasticsearch_password is not None
                and len(self.elasticsearch_password) > 0):
            self.elasticsearch = Elasticsearch(
                [self.elasticsearch_url],
                verify_certs=self.elasticsearch_ssl_verify,
                http_auth=(
                    self.elasticsearch_login,
                    self.elasticsearch_password,
                ),
            )
        else:
            self.elasticsearch = Elasticsearch(
                [self.elasticsearch_url],
                verify_certs=self.elasticsearch_ssl_verify,
            )

    def _index(self, payload):
        self.elasticsearch.index(index=self.elasticsearch_index,
                                 id=payload["x_opencti_id"],
                                 body=payload)

    def _delete(self, id):
        self.elasticsearch.delete(index=self.elasticsearch_index, id=id)

    def _process_message(self, msg):
        try:
            data = json.loads(msg.data)["data"]
        except:
            raise ValueError("Cannot process the message: " + msg)
        # Handle creation
        if msg.event == "create":
            self.helper.log_info("[CREATE] Processing data {" +
                                 data["x_opencti_id"] + "}")
            return self._index(data)
        # Handle update
        if msg.event == "update":
            self.helper.log_info("[UPDATE] Processing data {" +
                                 data["x_opencti_id"] + "}")
            return self._index(data)
        # Handle delete
        elif msg.event == "delete":
            self.helper.log_info("[DELETE] Processing data {" +
                                 data["x_opencti_id"] + "}")
            return self._delete(data["x_opencti_id"])
        return None

    def start(self):
        self.helper.listen_stream(self._process_message)
コード例 #52
0
host = input()
print("Port:")
puerto = input()

ES_HOST = {"host": host, "port": puerto}
es = Elasticsearch(hosts=[ES_HOST])

print("Index name:")
name_index = input()
print("Doc type:")
tipo = input()
print("ID:")
identificador = input()

print("Data a actualizar:")
print('Ejemplo: {"name":dato}')
data_new = input()
#data_new={"usuario":"desconocido"}

if es.indices.exists(index=name_index):
    resp = es.get(index=name_index, doc_type=tipo, id=identificador)
    print(resp)
    resp = es.index(index=name_index,
                    doc_type=tipo,
                    id=identificador,
                    body=data_new)
    resp_get = es.get(index=name_index, doc_type=tipo, id=identificador)
    print(resp_get)
else:
    print("El index " + name_index + " no existe")
コード例 #53
0
ファイル: history.py プロジェクト: russelmahmud/connectors
class HistoryConnector:
    def __init__(self):
        config_file_path = os.path.dirname(
            os.path.abspath(__file__)) + "/config.yml"
        config = (yaml.load(open(config_file_path), Loader=yaml.FullLoader)
                  if os.path.isfile(config_file_path) else {})
        self.helper = OpenCTIConnectorHelper(config)
        self.logger_config = self.helper.api.get_logs_worker_config()
        if (self.logger_config["elasticsearch_username"] is not None
                and self.logger_config["elasticsearch_password"] is not None):
            self.elasticsearch = Elasticsearch(
                [self.logger_config["elasticsearch_url"]],
                verify_certs=self.
                logger_config["elasticsearch_ssl_reject_unauthorized"],
                http_auth=(
                    self.logger_config["elasticsearch_username"],
                    self.logger_config["elasticsearch_password"],
                ),
            )
        elif self.logger_config["elasticsearch_api_key"] is not None:
            self.elasticsearch = Elasticsearch(
                [self.logger_config["elasticsearch_url"]],
                verify_certs=self.
                logger_config["elasticsearch_ssl_reject_unauthorized"],
                api_key=self.logger_config["elasticsearch_api_key"],
            )
        else:
            self.elasticsearch = Elasticsearch(
                [self.logger_config["elasticsearch_url"]],
                verify_certs=self.
                logger_config["elasticsearch_ssl_reject_unauthorized"],
            )
        self.elasticsearch_index = self.logger_config["elasticsearch_index"]

    def _process_message(self, msg):
        try:
            event_json = json.loads(msg.data)
            unix_time = round(int(msg.id.split("-")[0]) / 1000)
            event_date = datetime.datetime.fromtimestamp(
                unix_time, datetime.timezone.utc)
            timestamp = event_date.isoformat().replace("+00:00", "Z")
            origin = event_json["origin"] if "origin" in event_json else {}
            history_data = {
                "internal_id":
                msg.id,
                "event_type":
                msg.event,
                "timestamp":
                timestamp,
                "entity_type":
                "history",
                "user_id":
                origin["user_id"] if "user_id" in origin else None,
                "applicant_id":
                origin["applicant_id"] if "applicant_id" in origin else None,
                "context_data": {
                    "id":
                    event_json["data"]["x_opencti_internal_id"]
                    if "x_opencti_internal_id" in event_json["data"] else
                    event_json["data"]["x_opencti_id"],
                    "entity_type":
                    event_json["data"]["type"],
                    "from_id":
                    event_json["data"]["x_opencti_source_ref"]
                    if "x_opencti_source_ref" in event_json["data"] else None,
                    "to_id":
                    event_json["data"]["x_opencti_target_ref"]
                    if "x_opencti_target_ref" in event_json["data"] else None,
                    "message":
                    event_json["message"],
                },
            }
            self.elasticsearch.index(index=self.elasticsearch_index,
                                     id=msg.id,
                                     body=history_data)

        except elasticsearch.RequestError as err:
            print("Unexpected error:", err, msg)
            pass

    def start(self):
        self.helper.listen_stream(self._process_message)
コード例 #54
0
from elasticsearch import Elasticsearch
es = Elasticsearch()
body = {'title': 'elastic search python client integrated'}
s = es.index(index='distance', doc_type='_doc', body=body)
print(s)
#getData=es.get(index='distance', doc_type='_doc', id=7)
#print(getData)
"""
body={'title':'elastic search python client integrated33335'}
res=es.create(index='distance', doc_type='_doc', body=body, id='33335')
print(res)
"""
"""
body={'title':'透過python client產生中文,很多很多中文科科科'}
res=es.create(index='distance', doc_type='_doc', body=body, id='33336')
print(res)
"""
body = {"doc": {'title': 'haha透過python client產生中文,很多很多中文科科科'}}
res = es.update(index='distance', doc_type='_doc', body=body, id='33336')
print(res)
"""
body={"doc":
    {'title':'new 透過python client產生中文,很多很多中文科科科'}}
res=es.create(index='distance', doc_type='_doc', body=body, id='33337')
"""

# search, we can fit lots
result = es.search(index="distance", body={"query": {"match_all": {}}})
print(result)
コード例 #55
0
from elasticsearch import Elasticsearch
import json

connected = False
while not connected:
    try:
        es = Elasticsearch(['es'])
        connected = True
    except:
        continue

with open('rebu/fixtures/rebu/rebu_testdata.json') as json_file:
    data = json.load(json_file)
    for obj in data:
        if obj['model'] == 'rebu.meal':
            meal = obj['fields']
            meal['id'] = obj['pk']
            es.index(index='meals_index',
                     doc_type='meal',
                     id=meal['id'],
                     body=meal)
es.indices.refresh(index="meals_index")
print("Successfully loaded fixtures into ES")
コード例 #56
0
    data['subjectUrl'] = subject[0:subject.rindex("/")]
    data['subjectValue'] = subject[subject.rindex("/") + 1:]
    data['predicate'] = predicate
    data['predicateteUrl'] = predicate[0:predicate.rindex("/")]
    data['predicateValue'] = predicate[predicate.rindex("/") + 1:]
    data['object'] = object

    if 'graduated_at' in predicate:
        #        print predicate
        #        print object
        if object is not None:
            data['object'] = object[0:4]

    if 'object_id' in predicate:
        #        print object
        #        print predicate
        #        print type(str(object.toPython()))
        object = str(object.toPython())
        #        print object
        if map.get(object) is not None:
            data['object'] = map.get(object)
            print data.get('object')

    json_data = json.dumps(data)
    res = es.index(index=index_name, doc_type=index_type, body=json_data)
    if counter % 100000 == 0:
        print counter

es.indices.refresh(index=index_name)
print "Indexing Done"
コード例 #57
0
def find_config(user):
    client = MongoClient()
    db = client.test
    Scursor = db.SearchHistory.find({'user': user})
    #search_hist = []
    for hist in Scursor:
        print hist['history']
        list_word = hist['history']
    print list_word
    #csur= db.LikedPosts.find()
    #for dc in csur:
    #	print dc
    cursor = db.config.find()
    user_id = 0
    for doc in cursor:
        if user == doc['user']:
            break
        user_id = user_id + 1
    #print user_id
    URL = []

    cursor = db.config.find({'user': user})
    for document in cursor:

        for key in document['choice']:
            #print 'dssd'
            doccat = 'doc' + key.lower().replace(' ', '_')
            #print doccat
            URL.append(("http://*****:*****@@@@@@@@@@@@"

    myquery = {
        "query": {
            "multi_match": {
                "query": " ".join(list_word),
                "fields": ["data", "header"]
            }
        },
        "from": 0,
        "size": 100
    }

    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    result = es.search(index="_all", body=myquery)
    max_score = 0
    for rows in result['hits']['hits']:
        if max_score < rows['_score']:
            max_score = rows['_score']
    for rows in result['hits']['hits']:
        score = (rows['_score'] / max_score) * 0.5
        if len(rows['_source']['scores']) > user_id:
            rows['_source']['scores'][
                user_id] = rows['_source']['scores'][user_id] + score
        else:
            for i in range(user_id):
                rows['_source']['scores'].append(0.15)
            rows['_source']['scores'].append(0.15 + score)
        jsondata = rows['_source']  #json.dumps(dict1, ensure_ascii=False)
        es.index(index='doc' + user.lower() + 'home',
                 doc_type='peopleimg',
                 id=rows['_source']['link'],
                 body=jsondata)

    print URL
    for i in URL:
        myquery = {
            "query": {
                "multi_match": {
                    "query": " ".join(list_word),
                    "fields": ["data", "header"]
                }
            },
            "from": 0,
            "size": 100
        }
        try:
            es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

            result = es.search(index=i[2], body=myquery)
            max_score = 0
            for rows in result['hits']['hits']:
                if max_score < rows['_score']:
                    max_score = rows['_score']
            for rows in result['hits']['hits']:
                #print rows['_source']['header']
                score = (rows['_score'] / max_score) * 0.5

                if len(rows['_source']['scores']) > user_id:
                    rows['_source']['scores'][
                        user_id] = rows['_source']['scores'][user_id] + score

                else:
                    for i in range(user_id):
                        rows['_source']['scores'].append(0.15)
                    rows['_source']['scores'].append(0.15 + score)
                jsondata = rows[
                    '_source']  #json.dumps(dict1, ensure_ascii=False)

                es.update(index=i[2],
                          doc_type='peopleimg',
                          id=rows['_source']['link'],
                          body={"doc": jsondata})

        except Exception as e:
            pass  #print e
    '''
コード例 #58
0
                            "*_Classes\\\\CLSID\\\\*"
                        }
                    }, {
                        "wildcard": {
                            "registry_key_path.keyword": "*\\\\TreatAs"
                        }
                    }]
                }
            }
        }
    }
}

res = es.search(index="logs-endpoint-winevent-*", body=doc)

count = res['hits']['total']['value']
tactic = "Persistence"
technique = "Component Object Model Hijacking"
procedure = "Component Object Model Hijacking"
tech_code = "T1197"

action = {
    "Tactic": tactic,
    "Technique": technique,
    "Tech_code": tech_code,
    "Procedure": procedure,
    "EventCount": count,
}

es.index(index="represent_5", body=action, id=58)
コード例 #59
0
import requests
import random
from flask import Flask,render_template,request
import json
from elasticsearch import Elasticsearch 
es = Elasticsearch([{'host':'localhost','port':9200}])
app = Flask(__name__)

#Peoples
i = 1
while res.status_code == 200:
	res = requests.get('https://swapi.co/api/people/' + str(i))
	es.index(index='sw',doc_type='people',id=i,body=json.loads(res.content))
	i = i + 1
	print(i)


#Planets		
i = 1
while res.status_code == 200:
	res = requests.get('https://swapi.co/api/planets/' + str(i))
	es.index(index='sw',doc_type='planets',id=i,body=json.loads(res.content))
	i = i + 1
	print(i)

#Starships
i = 1
while res.status_code == 200:
	res = requests.get('https://swapi.co/api/starships/' + str(i))
	es.index(index='sw',doc_type='starships',id=i,body=json.loads(res.content))
	i = i + 1
コード例 #60
-7
def create_index(data):
    # connect to the elasticsearch instance
	es = Elasticsearch("http://ec2-52-3-61-194.compute-1.amazonaws.com:9200")

	INDEX_NAME = 'parktest'

   	d = {}
        d['time'] = data[0][0]
        d['garage_name'] = data[0][1]
        location = {}
        location['lat'] = data[0][2]
        location['lon'] = data[0][3]
        d['location'] = location
        d['availability'] = data[1]

    # get the details about the document with id = garage_name
	res = es.get(index=INDEX_NAME, doc_type=INDEX_NAME, id=data[0][1], ignore=404)

	#if the document with id do not exist, create it
	if not res['found']:
        	es.index(index=INDEX_NAME, doc_type=INDEX_NAME, id=data[0][1], body=d, refresh=True)
	else:
		#update the document
		qq = '{"doc": { "availability":'+str(data[1])+'  }}'
		es.update(index=INDEX_NAME, doc_type=INDEX_NAME,id=data[0][1], body=qq)

	return d