コード例 #1
0
 def calc(self, w1, w2):
     w = stemmer.stem(w1) + " " + stemmer.stem(w2)
     q = {
         "query": {
             "match": {
                 "gram": {
                     "query": w,
                     "minimum_should_match": "2"
                 }
             }
         }
     }
     phraseHitsPos = es.search(index=posindex, body=q,
                               size=0)['hits']['total'] + 0.01
     phraseHitsNeg = es.search(index=negindex, body=q,
                               size=0)['hits']['total'] + 0.01
     if (phraseHitsNeg < 2 or phraseHitsPos < 2):
         return 0
     else:
         q = {"query": {"match_all": {}}}
         totalPos = es.search(index=posindex, body=q,
                              size=0)['hits']['total']
         totalNeg = es.search(index=negindex, body=q,
                              size=0)['hits']['total']
         SO = math.log(
             (phraseHitsPos * totalNeg) / (phraseHitsNeg * totalPos), 2)
         # print("pos: " + str(phraseHitsPos))
         # print("neg: " +str(phraseHitsNeg))
         # print(SO)
         return SO
コード例 #2
0
    def on_post(self, req, resp):
        cmd = req.get_param('cmd')

        result = {}
        if cmd == 'add':
            book = req.get_param('book')
            file_path = save_file(book)
            task_data = {'path': file_path}
            try:
                add_book_task.delay(task_data)
                result = {'msg': 'file putted in queue'}
            except Exception as e:
                result = {'error': str(e)}
                delete_file(file_path)
        elif cmd == 'create':
            result = create_index()
        elif cmd == 'delete':
            result = delete_index()
        elif cmd == 'count':
            result = count_items()
        elif cmd == 'search':
            q = req.get_param('q')
            result = search(q)
        elif cmd == 'search_advanced':
            q = req.get_param('q')
            result = search_advanced(q)

        resp.body = json.dumps(result)
        resp.status = falcon.HTTP_200
コード例 #3
0
def putKey(apiKey):
	matchAll = {"query": {"match_all": {}}}
	ssConfig = es.search(esService, matchAll, 'sweet_security', 'configuration')
	if ssConfig is not None:
		for config in ssConfig['hits']['hits']:
			body = {'doc' : {'fileCheckKey': apiKey}}
			es.update(esService, body, 'sweet_security', 'configuration', config['_id'])
コード例 #4
0
ファイル: main.py プロジェクト: FicHub/fichub.net
def get_fixits(q: str) -> List[str]:
    fixits: List[str] = []
    if q.find('tvtropes.org') >= 0:
        fixits += [
            '(note that tvtropes.org is not directly supported; instead, use the url of the actual fic)'
        ]
    if q.find('http://') != 0 and q.find('https://') != 0:
        fixits += [
            '(please try a full url including http:// or https:// at the start)'
        ]
    if q.find('fanfiction.net') >= 0:
        fixits += [
            'fanfiction.net is fragile at the moment; please try again later or check the discord'
        ]
    if q.find('fanfiction.net/u/') >= 0:
        fixits += [
            'user pages on fanfiction.net are not currently supported -- please try a specific story'
        ]
    if q.find('fictionpress.com') >= 0:
        fixits += [
            'fictionpress.com is fragile at the moment; please try again later or check the discord'
        ]
    try:
        import es
        import urllib.parse
        fis = es.search(q, limit=15)
        for fi in fis:
            u = urllib.parse.quote(fi.source, safe='')
            fixits += [
                f'<br/>did you mean <a href=/fic/{fi.id}>{fi.title} by {fi.author}</a>?'
            ]
    except:
        pass
    return fixits
コード例 #5
0
ファイル: index1.py プロジェクト: yuanzai/bblio2
def search(search_term,result,start_result=0):
    es = Elasticsearch(host)
    q = {   
            "fields" : ["text"],
            "from" : start_result,
            "size" : result,
            "query": {
                "query_string": {
                    "query": search_term,
                                }               
                      },
            "highlight": {
                 "fields": {
                    "text": {"fragment_size" : 100, "number_of_fragments": 5}
                        }
                    } 
             }
    res = es.search(index=_index, body=q)    
    r =  res['hits']['hits']
    l = []
    for re in r:
        d = {"urlAddress" : re['fields']['urlAddress'],
             "title" : str(re['fields']['title'][3:-2].decode('utf-8')),
             "id" : re['_id'],
             "score" : re['_score'],}
        if d['title'] == '':
            d['title'] = d['urlAddress']
        try:
            d.update({"highlight" : re['highlight']['text']})
        except:
            pass

        l.append(d)
    return {'result_list': l,
            'result_count': res['hits']['total']}
コード例 #6
0
	def calc(self, w1, w2):
		w = stemmer.stem(w1)+ " " + stemmer.stem(w2)
		q = {"query":{"match": {"gram" :{"query": w,"minimum_should_match": "2"} }}}
		phraseHitsPos = es.search(index = posindex, body = q, size = 0)['hits']['total']+0.01
		phraseHitsNeg = es.search(index = negindex, body = q, size = 0)['hits']['total']+0.01
		if(phraseHitsNeg < 2 or phraseHitsPos < 2):
			return 0
		else:
			q = {"query":{"match_all" : { } }}
			totalPos = es.search(index = posindex, body = q, size = 0)['hits']['total']
			totalNeg = es.search(index = negindex, body = q, size = 0)['hits']['total']
			SO = math.log((phraseHitsPos*totalNeg)/(phraseHitsNeg*totalPos),2)
			# print("pos: " + str(phraseHitsPos))
			# print("neg: " +str(phraseHitsNeg))
			# print(SO)
			return SO
コード例 #7
0
def search():
    keyword = request.args.get("keyword")
    results = es.search(keyword)
    if "username" in session:
        username = session["username"]
        u = users.find_one(username=username)
        return render_template("search_results.html", results=results, keyword=keyword, user=u)
    return render_template("search_results.html", results=results, keyword=keyword)
コード例 #8
0
def getKey():
	# Get Configuration Settings
	apiKey=None
	matchAll = {"query": {"match_all": {}}}
	ssConfig = es.search(esService, matchAll, 'sweet_security', 'configuration')
	if ssConfig is not None:
		for config in ssConfig['hits']['hits']:
			if 'fileCheckKey' in config['_source']:
				apiKey=config['_source']['fileCheckKey']
	return apiKey
コード例 #9
0
ファイル: search_task.py プロジェクト: IsuraNimalasri/bse
def do_task(task_data):
    """
    Search, send result to email and run log task task.

    :param task_data: data to log
    :type task_data: dict
    :return: data structure for log task
    :rtype: dict
    """

    results = search(task_data['q'])
    send_email(results, task_data)
    log_task_data = log_results(results, task_data)
    return log_task_data
コード例 #10
0
def getSSConfig():
    config = {
        'defaultIsolate': 0,
        'defaultLogRetention': 30,
        'defaultMonitor': 0,
        'defaultFW': 0
    }
    matchAll = {"query": {"match_all": {}}}
    ssConfigData = es.search(esService, matchAll, 'sweet_security',
                             'configuration')
    if ssConfigData is not None:
        for entry in ssConfigData['hits']['hits']:
            config = entry['_source']
    return config
コード例 #11
0
def run():
    logger = logging.getLogger('SweetSecurityServerLogger')
    logger.info('Running Baseliner')
    matchAll = {"query": {"match_all": {}}}

    #Create TARDIS index if it is missing
    tardisQuery = es.search(esService, matchAll, 'tardis', 'known_hosts')
    if tardisQuery is None:
        logger.info('Creating TARDIS Index')
        #print "Creating TARDIS Index"
        os.popen(
            'curl -XPUT \'localhost:9200/tardis?pretty\' -H \'Content-Type: application/json\' -d\' {"mappings" : {"known_hosts" : {"properties" : { "mac" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},"destination" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}},"port" : { "type" : "text", "fields": {"raw": {"type": "keyword"}}}}}}}\''
        ).read()

    #Get List of Known Devices
    allDevices = es.search(esService, matchAll, 'sweet_security', 'devices')
    if allDevices is not None:
        for host in allDevices['hits']['hits']:
            logger.info("Searching Device %s(%s : %s)" %
                        (host['_source']['nickname'], host['_source']['ip4'],
                         host['_source']['mac']))
            logger.info("    Searching conn.log")
            conn = connSearch(host['_source']['ip4'], host['_source']['mac'])
            logger.info("      Found %d new entries" % conn)

            logger.info("    Searching dns.log")
            dns = dnsSearch(host['_source']['ip4'], host['_source']['mac'])
            logger.info("      Found %d new entries" % dns)

            logger.info("    Searching http.log")
            http = httpSearch(host['_source']['ip4'], host['_source']['mac'])
            logger.info("      Found %d new entries" % http)

            logger.info("    Searching ssl.log")
            ssl = sslSearch(host['_source']['ip4'], host['_source']['mac'])
            logger.info("      Found %d new entries" % ssl)
コード例 #12
0
def after_crawling():
    # elastiesarch index 검색
    # search_index = input("엘라스틱 서치에서 검색 할 index 이름을 입력하시오 :  ")
    index = es.search("olympic")  # es에서 검색한 결과
    data_list = es.convert_to_list(index)  # es _source(data value) 만 가져와서 list로 변환

    # datapreprocessing 1. 형태소 분석 2.명사 추출  2-1. 불용어 처리
    # 1. 형태소 분석
    # data_preprocessing.m_analysis(data_list)
    # 2. 명사 추출
    nouns_list = data_preprocessing.noun_extraction(data_list)
    # 2-1. 불용어 처리 ( 명사 추출 한 결과)
    result = data_preprocessing.stopword(nouns_list)

    # result store in mysql - 불용어 처리 결과 저장
    mysql.nouns_store(result)
コード例 #13
0
def getLogData():
	files=[]
	#timestamp is 10m to make sure we don't miss anything, will skip the check if the file was already checked
	fileQuery={"query": {
			"bool":{
				"must":[
					{"match_phrase": { "path": "/opt/nsm/bro/logs/current/files.log" }},
					{"range" : { "@timestamp" : {"gt" : "now-10m"}}}]
				}
			}
		}
	fileData=es.search(esService, fileQuery, 'logstash-*', 'logs', 10000)
	logTotal=fileData['hits']['total']
	for log in fileData['hits']['hits']:
		files.append(log)
	return files
コード例 #14
0
def get_definition_mdx(word, builder) -> List[bytes]:
    """根据关键字得到MDX词典的解释"""
    if not word:
        return [not_found.encode('utf-8')]
    word = word.lower()
    content = builder.mdx_lookup(word)
    if len(content) < 1:
        word = spellchecker.correction(word)
        content = builder.mdx_lookup(word)
    if len(content) < 1:
        content = builder.mdx_lookup(word.upper())
    if len(content) < 1:
        content = builder.mdx_lookup(plural2singular(word.lower()))
    if is_chinese(word):
        content += search(word)
    if len(content) < 1:
        return [not_found.encode('utf-8')]

    pattern = re.compile(r"@@@LINK=([\w\s]*)")
    rst = pattern.match(content[0])
    if rst is not None:
        link = rst.group(1).strip()
        content = builder.mdx_lookup(link)
    # remove \r\n and entry:/
    str_content = ""
    if len(content) > 0:
        for c in content:
            str_content += c.replace("\r\n", "").replace("entry:/", "")

    injection_html = ''
    try:
        base_path = os.path.dirname(sys.executable)
    except IOError:
        base_path = os.path.abspath(".")

    resource_path = os.path.join(base_path, 'mdx')

    injection = get_all_files(resource_path)

    for p in injection:
        if match_file_ext(p, 'html'):
            injection_html += read_all_lines(p)

    output_html = str_content + injection_html
    return [output_html.encode('utf-8')]
コード例 #15
0
def dnsSearch(ip, mac):
    numFound = 0
    dnsData = getLogs(ip, '/opt/nsm/bro/logs/current/dns.log')
    knownQueries = []
    knownDnsQuery = {"query": {"match_phrase": {"mac": {"query": mac}}}}
    knownDnsData = es.search(esService, knownDnsQuery, 'tardis',
                             'known_dnsqueries')
    for query in knownDnsData['hits']['hits']:
        if query['_source']['query'] not in knownQueries:
            knownQueries.append(query['_source']['query'])
    for log in dnsData['hits']['hits']:
        if log['_source']['query'] not in knownQueries:
            numFound += 1
            knownQueries.append(log['_source']['query'])
            dnsData = {'mac': mac, 'query': log['_source']['query']}
            es.write(esService, dnsData, 'tardis', 'known_dnsqueries')
            alertMessage = 'A new DNS query was added to the baseline: %s' % log[
                '_source']['query']
            alert.send('Baseliner', alertMessage, log['_id'], log['_index'])
    return numFound
コード例 #16
0
def connSearch(ip, mac):
    numFound = 0
    connData = getLogs(ip, '/opt/nsm/bro/logs/current/conn.log')
    knownHosts = []
    knownHostQuery = {"query": {"match_phrase": {"mac": {"query": mac}}}}
    knownHostData = es.search(esService, knownHostQuery, 'tardis',
                              'known_hosts')
    for device in knownHostData['hits']['hits']:
        if device['_source']['ip'] not in knownHosts:
            knownHosts.append(device['_source']['ip'])

    for log in connData['hits']['hits']:
        if log['_source']['resp_h'] not in knownHosts:
            numFound += 1
            knownHosts.append(log['_source']['resp_h'])
            hostData = {'mac': mac, 'ip': log['_source']['resp_h']}
            es.write(esService, hostData, 'tardis', 'known_hosts')
            alertMessage = 'A new IP was added to the baseline: %s' % log[
                '_source']['resp_h']
            alert.send('Baseliner', alertMessage, log['_id'], log['_index'])
    return numFound
コード例 #17
0
def deleteOldLogs():
    logger = logging.getLogger('SweetSecurityServerLogger')
    logger.info('Checking local disk space')
    diskUsage = checkDisk()
    #Warn user if disk storage is above 85%
    if diskUsage > 84:
        message = 'Server disk usage is at %d%%' % diskUsage
        response = alert.send('Disk Check', message, None, None)
    logger.info('Cleaning up logs')
    ssConfig = getSSConfig()
    defaultLogRetention = ssConfig['defaultLogRetention']
    if defaultLogRetention == 0:
        logger.info('System configured to never delete logs')
        return 'Logs configured to never delete'
    else:
        logger.info('System is configured to delete logs older than %d days' %
                    defaultLogRetention)
    matchAll = {"query": {"match_all": {}}}
    logsDeleted = 0
    today = datetime.datetime.now()
    indices = []
    for index in esService.indices.get('logstash-*'):
        indices.append(index)
    logger.info("There are %d days worth of logs" % len(indices))
    indices = sorted(indices)
    for index in indices:
        indexData = es.search(esService, matchAll, index, 'logs')
        logCount = indexData['hits']['total']
        indexDate = datetime.datetime.strptime(index[-10:], "%Y.%m.%d")
        indexDaysOld = today - indexDate
        indexDaysOld = indexDaysOld.days
        logger.info("%s is %d days old and has %d logs" %
                    (index, indexDaysOld, logCount))
        if indexDaysOld > defaultLogRetention:
            logger.info("Deleting index %s" % index)
            #esService.indices.delete(index=index)
            logsDeleted += logCount
    logger.info("Deleted %d logs" % logsDeleted)
    return "Deleted %d logs" % logsDeleted
コード例 #18
0
def httpSearch(ip, mac):
    numFound = 0
    httpData = getLogs(ip, '/opt/nsm/bro/logs/current/http.log')
    knownWebsites = []
    knownHostQuery = {"query": {"match_phrase": {"mac": {"query": mac}}}}
    knownHostData = es.search(esService, knownHostQuery, 'tardis',
                              'known_websites')
    for url in knownHostData['hits']['hits']:
        if url['_source']['server_name'] not in knownWebsites:
            knownWebsites.append(url['_source']['server_name'])
    for log in httpData['hits']['hits']:
        if log['_source']['server_name'] not in knownWebsites:
            numFound += 1
            knownWebsites.append(log['_source']['server_name'])
            hostData = {
                'mac': mac,
                'server_name': log['_source']['server_name']
            }
            es.write(esService, hostData, 'tardis', 'known_websites')
            alertMessage = 'A new website was added to the baseline: %s' % log[
                '_source']['server_name']
            alert.send('Baseliner', alertMessage, log['_id'], log['_index'])
    return numFound
コード例 #19
0
def getLogs(ip, log):
    logQuery = {
        "query": {
            "bool": {
                "must": [{
                    "match": {
                        "orig_h": ip
                    }
                }, {
                    "match_phrase": {
                        "path": log
                    }
                }, {
                    "range": {
                        "@timestamp": {
                            "gt": "now-1h"
                        }
                    }
                }]
            }
        }
    }
    logData = es.search(esService, logQuery, 'logstash-*', 'logs')
    return logData
コード例 #20
0
ファイル: server.py プロジェクト: temy13/edinet_search
def search(query,
           offset=0,
           length=300,
           t_from="",
           t_to="",
           titles=[],
           title_indexes=[]):
    #titles = title_filter(titles)
    #title_indexes.extend()
    #if not title_indexes:
    #  title_indexes = [0,1,33,74]
    count, data = es.search(query,
                            offset=offset,
                            t_from=t_from,
                            t_to=t_to,
                            title_indexes=title_indexes)
    rdata = []
    for d in data:
        dx = {}
        content = d["value"]
        soup = BeautifulSoup(content, "lxml")
        content = soup.getText()
        content = content.replace("\n", "").replace("\u3000", "")
        pos = content.find(query)
        s = pos - length if pos > length else 0
        dx["value"] = content[s:s + (length * 2)]
        if len(content) > (length * 2):
            dx["value"] += "..."

        dx["publisher"] = d["publisher"]
        dx["term"] = d["term"]
        dx["term_from"] = dt_convert(d["term_from"])
        dx["term_to"] = dt_convert(d["term_to"])
        rdata.append(dx)

    return count, rdata
コード例 #21
0
def main():

    # 크롤러 실행
    article_data = crawler.crawling()
    
    # elasticsearh 크롤링 원문 데이터 저장
    sotre_index = input("엘라스틱 서치에 저장 할 index 이름을 입력하시오 :  ")
    es.store(sotre_index, article_data)
    
    # elastiesarch index 검색
    search_index = input("엘라스틱 서치에서 검색 할 index 이름을 입력하시오 :  ")
    index = es.search(search_index) # es에서 검색한 결과
    data_list = es.convert_to_list(index) # es _source(data value) 만 가져와서 list로 변환
    
    # datapreprocessing 1. 형태소 분석 2.명사 추출  2-1. 불용어 처리
    # 1. 형태소 분석
    # data_preprocessing.m_analysis(data_list)
    # 2. 명사 추출
    nouns_list = data_preprocessing.noun_extraction(data_list)
    # 2-1. 불용어 처리 ( 명사 추출 한 결과)
    result = data_preprocessing.stopword(nouns_list)

    # result store in mysql - 불용어 처리 결과 저장
    mysql.nouns_store(result)

    # tf 계산
    words = mysql.search_in_dataResult() # tf 계산하기 위한 noun column만 가져오기
    df_tf = tfidf.cal_tf(words) # tf 값 계산
    mysql.store_tf_value(df_tf) # tf dataframe(id, noun, count) 저장
   
    # TFIDF vector - sklearn
    # corpus = tfidf.make_list_for_tfidf(words)
    # tfidf.cal_vector(corpus)
    
    # ngram - top word 연관검색어 함수 실행
    realted_keyword()
コード例 #22
0
ファイル: basemodel.py プロジェクト: pythononwheels/pow_devel
 def find(self,body):
     """ Find something given a query or criterion """
     res = es.search(index=dbname, body=body)
     return res
コード例 #23
0
ファイル: basemodel.py プロジェクト: rosegold-byte/pow_devel
 def find(self,body):
     """ Find something given a query or criterion """
     res = es.search(index=dbname, body=body)
     return res
コード例 #24
0
def search():
    query = request.json.get('query')
    facets = request.json.get('facets')

    return jsonify(es.search(query, facets))