'a') as file_obj: for key in new_title_hash.keys(): title_parts = key.split("\n") title_str = "" for i in title_parts: title_str = title_str + i + " " file_obj.write(title_str[:-1] + "|" + str(new_title_hash[key]) + "\n") print len(new_title_hash), " new article(s) added" new_title_hash = {} title_hash = {} es = Elasticsearch(['http://localhost:9200']) res = es.search(index="arxiv_feed", doc_type="feed", size=2000) res = res['hits']['hits'] for entry in res: # this part of code removes all the new line characters that might be present between the title as in teh fees and replaces them with spaces entry_id = entry['_id'] entry = entry['_source'] print entry_id print entry.keys() title_parts = entry['title'].encode('utf-8').split("\n") title_str = "" for i in title_parts: title_str = title_str + i + " " title_str = title_str[:-1]
print('Benchmark file: {0}'.format(args.csv_file)) print('ElasticSearch URL: {0}'.format(args.url)) return args def build_index_name(index_prefix: str, d: datetime.date) -> str: today = d.strftime('%Y-%m-%d') index = '{0}-{1}'.format(index_prefix, today) print('Index: {0}'.format(index)) return index if __name__ == "__main__": cmd_args = parse_args(sys.argv[1:]) index_name = build_index_name(cmd_args.index_prefix, datetime.utcnow().date()) es = Elasticsearch( hosts=[cmd_args.url], http_auth=(cmd_args.username, cmd_args.password) if cmd_args.username else None, use_ssl=cmd_args.use_ssl, verify_certs=cmd_args.verify_certs) processed = process_data( cmd_args.csv_file, index=index_name, process_func=lambda actions: helpers.bulk(es, actions)) print('Indexed {0} benchmarks'.format(processed))
#!/usr/bin/python # -*- coding: utf-8 -*- from elasticsearch import Elasticsearch, client from FMStats import confManager as cm import json # Configuration conf = cm.ConfManager() es = Elasticsearch([conf.elasticAddres()]) ec = client.IndicesClient(es) def checkIndex(esIndex): if ec.exists(index=esIndex): return True else: return False def createIndex(esIndex, esMapping): mapping = open(esMapping, 'r') mapping = mapping.read() ec.create(index=esIndex, body=mapping) def mappingInit(): if checkIndex(conf.artistIndex()) == False: createIndex(conf.artistIndex(), conf.artistMapping()) if checkIndex(conf.radioIndex()) == False: createIndex(conf.radioIndex(), conf.radioMapping()) return True # All indices inicialized.
#!/usr/bin/env python3 from elasticsearch import Elasticsearch client = Elasticsearch() ready = False try: while not ready: ready = client.ping() except KeyboardInterrupt: pass
#!/usr/bin/env python ''' Licensed to Elasticsearch B.V under one or more agreements. Elasticsearch B.V licenses this file to you under the Apache 2.0 License. See the LICENSE file in the project root for more information ''' from elasticsearch import Elasticsearch es = Elasticsearch() print("bb143628fd04070683eeeadc9406d9cc - L:11") # tag::bb143628fd04070683eeeadc9406d9cc[] response = es.index(index='twitter', id=1, body={ 'user': '******', 'post_date': '2009-11-15T14:12:12', 'message': 'trying out Elasticsearch', }) # end::bb143628fd04070683eeeadc9406d9cc[] print("---------------------------------------") print(response) print("---------------------------------------") print("804a97ff4d0613e6568e4efb19c52021 - L:77") print("TODO") print("d718b63cf1b6591a1d59a0cf4fd995eb - L:121") # tag::d718b63cf1b6591a1d59a0cf4fd995eb[] response = es.index(
def copy_es_index(source_index=None, target_index=None, create_target_index=True, refresh=True, wait_for_completion=True, add_copied_from=None): ''' Method to duplicate one ES index to another Args: create_target_index (boolean): If True, check for target and create source_index (str): Source ES index to copy from target_index (str): Target ES index to copy to Returns: (dict): results of reindex via elasticsearch client reindex request ''' # get ES handle es_handle_temp = Elasticsearch(hosts=[settings.ES_HOST]) # put/confirm combine es index templates template_body = { 'template': '*', 'settings': { 'number_of_shards': 1, 'number_of_replicas': 0, 'refresh_interval': -1 }, 'mappings': { 'record': { 'date_detection': False, 'properties': { 'combine_db_id': { 'type': 'integer' } } } } } es_handle_temp.indices.put_template('combine_template', body=json.dumps(template_body)) # if creating target index check if target index exists if create_target_index and not es_handle_temp.indices.exists( target_index): es_handle_temp.indices.create(target_index) # prepare reindex query dupe_dict = { 'source': { 'index': source_index, 'query': {} }, 'dest': { 'index': target_index } } # if add_copied_from, include in reindexed document if add_copied_from: dupe_dict['script'] = { 'inline': 'ctx._source.source_job_id = %s' % add_copied_from, 'lang': 'painless' } # reindex using elasticsearch client reindex = es_handle_temp.reindex( body=dupe_dict, wait_for_completion=wait_for_completion, refresh=refresh) return reindex
from datetime import datetime from elasticsearch import Elasticsearch es = Elasticsearch() # es = Elasticsearch([{'host': 'd.es.dataapi.rea-asia.com', 'port': 9200}]) doc = { 'author': 'Kamal', "searched_keyword": { "search_keyword": "Sunday spk", "matched_places": { "Sunday spk …": 90, "Sunday spkksjff": 89, "XXXXXXXXX": 80 } }, 'timestamp': datetime.now(), } res = es.index(index="keyword", doc_type='search_submit', id=1, body=doc) string_matching = { 'searchkeyword': 'midvalley', 'text': 'most relevant search keywords according to db', 'timestamp': datetime.now(), 'matched_placekeywords': { 1: 'mid valley city', 2: 'mid valley gardens', 3: 'mid valley gardens' } } res = es.index(index="midvalley",
import ast import math import re import time from tqdm import tqdm from index.query_processing import get_query_words, get_query_vectors from index.merge_index import get_vocab_size from index.create_inverted_docs import create_index from index.constants import * from index.proximity_search import * from index.merge_index import read_data from elasticsearch import Elasticsearch ES = Elasticsearch() def generate_result_matrix(results_dic, query_no, doc_no, score_query): if query_no not in results_dic.keys(): results_dic[query_no] = {doc_no: score_query} else: if doc_no not in results_dic[query_no]: results_dic[query_no][doc_no] = score_query return results_dic def write_result(result_dic, filename): with open(filename, 'w') as f: for query_no in result_dic.keys(): rank = 1
from laserembeddings import Laser import sys __author__ = "Bijin Benny" __email__ = "*****@*****.**" __license__ = "MIT" __version__ = "1.0" #Client connection to local BERT server bc = BertClient(ip='localhost', output_fmt='list') #Instance of the LASER language model laser = Laser() #Elasticsearch DB client client = Elasticsearch(hosts="http://*****:*****@localhost:9200/") """ createScript function creates custom database queries based on the search type. The search type includes basic TF-IDF term search, LASER vector and BERT vector similarity searches. The function returns a unique query for each of the scenarios. Arguments : query : Text form of the query search type : Type of search, i.e term, laser or bert query_vector : Vector form of the query for cosine similarity """ def createScript(query, search_type, query_vector): if (search_type == 'term'): return {
def create_app(config_class=Config): app = Flask(__name__) app.config.from_object(config_class) db.init_app(app) migrate.init_app(app, db) login.init_app(app) mail.init_app(app) bootstrap.init_app(app) moment.init_app(app) babel.init_app(app) app.elasticsearch = Elasticsearch([app.config['ELASTICSEARCH_URL']]) \ if app.config['ELASTICSEARCH_URL'] else None app.redis = Redis.from_url(app.config['REDIS_URL']) app.task_queue = rq.Queue('microblog-tasks', connection=app.redis) from app.errors import bp as errors_bp app.register_blueprint(errors_bp) from app.auth import bp as auth_bp app.register_blueprint(auth_bp, url_prefix='/auth') from app.main import bp as main_bp app.register_blueprint(main_bp) if not app.debug and not app.testing: if app.config['MAIL_SERVER']: auth = None if app.config['MAIL_USERNAME'] or app.config['MAIL_PASSWORD']: auth = (app.config['MAIL_USERNAME'], app.config['MAIL_PASSWORD']) secure = None if app.config['MAIL_USE_TLS']: secure = () mail_handler = SMTPHandler( mailhost=(app.config['MAIL_SERVER'], app.config['MAIL_PORT']), fromaddr='no-reply@' + app.config['MAIL_SERVER'], toaddrs=app.config['ADMINS'], subject='Microblog Failure', credentials=auth, secure=secure) mail_handler.setLevel(logging.ERROR) app.logger.addHandler(mail_handler) if app.config['LOG_TO_STDOUT']: stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO) app.logger.addHandler(stream_handler) else: if not os.path.exists('logs'): os.mkdir('logs') file_handler = RotatingFileHandler('logs/microblog.log', maxBytes=10240, backupCount=10) file_handler.setFormatter( logging.Formatter('%(asctime)s %(levelname)s: %(message)s ' '[in %(pathname)s:%(lineno)d]')) file_handler.setLevel(logging.INFO) app.logger.addHandler(file_handler) app.logger.setLevel(logging.INFO) app.logger.info('Microblog startup') return app
"receipts_root_hash": "11111111111111111111111111111111", "pub_key": "GZsJqUVM3QHVANAb2U9TGGoawjn6Tn2Wipzdeuzy1CcYjfFxuq" }, "hash": "FGxUBNkjAzQQ6GYcMbzWvmUHovuvcXiUv8P34XkyryVV" }, "node_id": "d4b2fc83-976a-4eef-a342-6dc87f87afe8" } return doc from elasticsearch import Elasticsearch from elasticsearch import helpers # 일레스틱서치 IP주소와 포트(기본:9200)로 연결한다 es = Elasticsearch("http://13.209.67.143:9200/") # 환경에 맞게 바꿀 것 es.info() # 인덱스는 독립된 파일 집합으로 관리되는 데이터 덩어리이다 def makeIndex(es, index_name): """인덱스를 신규 생성한다(존재하면 삭제 후 생성) """ print(f"make index for {index_name}") if es.indices.exists(index=index_name): es.indices.delete(index=index_name) print(es.indices.create(index=index_name)) def genBulkEntry(newdoc, indexName, indexType): #body = json.dumps(doc)
import sys from elasticsearch import Elasticsearch from utils.kafkahelper import KafkaConnection PORT = 9200 INDEX_NAME = "data" host = "localhost:%s" % PORT es = Elasticsearch([host]) def get_raw_data(query): items = [] offset = 0 limit = 100 # By default, the limit is 10 while True: response = es.search(index=INDEX_NAME, q=query, size=limit, from_=offset) data = response['hits']['hits'] if len(data) == 0: break items += data offset += limit return items def push_data(data): conn = KafkaConnection() for item in data:
def __init__(self, hosts, port, timeout, **kwargs): self.conn = Elasticsearch(hosts=hosts, port=port, **kwargs) self.timeout = timeout
from faker import Factory from datetime import datetime from elasticsearch import Elasticsearch import json esDomainEndpoint = "http://search-endpoint:80" es = Elasticsearch(esDomainEndpoint) def create_names(fake): for x in range(100): genUname = fake.slug() genName = fake.name() genJob = fake.job() genCountry = fake.country() genText = fake.text() genProfile = fake.profile() go = es.index(index="profiles", doc_type="users", id=genUname, body={ "name": genName, "job": genJob, "country": genCountry, "notes": genText, "profile_details": genProfile, "timestamp": datetime.now() }) print json.dumps(go)
def api_search(request): query = request.GET.get("term", None) if query is None: return HttpResponse("[]") # For user experiment, run search version 1 or 2, 2 being more feature # rich and having parsed filters. See atlas-data#32 search_version = int(request.GET.get("search_var", 0)) # Parse search query query, query_type, kwargs = parse_search( query, strip_keywords=(search_version != 1)) # Resolve any synonyms. feasibility -> pie_scatter etc. if "app_name" in kwargs: given_app_name = kwargs["app_name"][0] kwargs["app_name"] = [ APP_NAME_SYNONYMS.get(given_app_name, given_app_name) ] # Viz params are not an elasticsearch filter so pop that off viz_params = kwargs.pop("viz_params", None) # Prepare elasticsearch filters if search_version == 2 or search_version == 0: filters = prepare_filters(kwargs) else: filters = {} es_query = {"query": {"filtered": {}}, "size": 8} # Add filters to the query if they were given. Filters are ANDed. if len(filters) > 0: es_filters = [{ "terms": { k: [x.lower() for x in v] } } for k, v in filters.iteritems()] es_filters = {"bool": {"must": es_filters}} es_query["query"]["filtered"]["filter"] = es_filters # Add fuzzy search for query string if any non-filter query string remains # after taking out the filters if query.strip() != "": es_query["query"]["filtered"]["query"] = { "fuzzy_like_this": { "like_text": query, "fields": ["title"], "max_query_terms": 15, "prefix_length": 3 } } # Do the query es = Elasticsearch() result = es.search(index="questions", body=es_query) # Format the results in a way that complies with the OpenSearch standard's # suggestion extension labels = [] urls = [] for x in result['hits']['hits']: data = x['_source'] # Regenerate title and url so we can add stuff into it dynamically, # like the year being searched for, or forcing an app. years = kwargs.get('years', None) # Possible apps this title could be visualized as app_names = data['app_name'] # If the app the user requested is possible, use that. Otherwise, use # the first one as default. App names in the elasticsearch index are # sorted in a certain way for this to make sense so check out the # indexer script requested_app_name = filters.get("app_name", [None])[0] if requested_app_name in app_names: app_name = requested_app_name else: app_name = app_names[0] if years and len(years) == 2: if app_name in ["map", "tree_map"]: # If multiple years are specified and we can do a stacked # graph, do a stacked graph instead of a treemap or map app_name = "stacked" elif app_name in ["product_space", "pie_scatter"]: # Some apps can never have multiple years so just use the first # one specified years = [years[0]] # If no years specified, use default years if years is None: if app_name == "stacked": years = [1995, 2012] else: years = [2012] # You can't show a product space based on imports so ignore those if app_name == "product_space" and data["trade_flow"] == "import": continue title = get_title(api_name=data['api_name'], app_name=app_name, country_names=data.get('country_names', None), trade_flow=data['trade_flow'], years=years, product_name=data.get('product_name', None)) url = params_to_url(api_name=data['api_name'], app_name=app_name, country_codes=data.get('country_codes', None), trade_flow=data['trade_flow'], years=years, product_code=data.get('product_code', None)) if viz_params: if app_name == "pie_scatter": url += "?queryActivated=True" url += "&yaxis=%s" % viz_params[0] labels.append(title) urls.append(settings.HTTP_HOST + url) return HttpResponse(json.dumps([query, labels, [], urls]))
from api.models import db, models from api.core import create_response, KEYWORDS, get_database_url from api.auth import auth _elasticsearch = Blueprint("_elasticsearch", __name__) _es_url = get_database_url()["elasticsearch"] _es = None if "https" in _es_url: import certifi _es = Elasticsearch(get_database_url()["elasticsearch"], use_ssl=True, ca_certs=certifi.where()) else: _es = Elasticsearch(get_database_url()["elasticsearch"]) def _generate_body(query): return { "query": { "multi_match": { "query": query, "type": "bool_prefix", "fields": ["name", "name._2gram", "name._3gram"], } } }
def setup_method(self, method): elastic = Elasticsearch(transport_class=DummyTransport) self.client = LtrClient(elastic)
from elasticsearch import Elasticsearch parser = argparse.ArgumentParser(description="configures elastic") parser.add_argument( "--task", "-t", default="setup", choices=["setup", "delete"] ) args = parser.parse_args() logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) es_user = os.getenv("ELASTIC_USER") if es_user is not None: es_pass = os.getenv("ELASTIC_PASS") es = Elasticsearch([os.getenv("ELASTICSEARCH_URL", "elasticsearch:9200")], http_auth=(es_user, es_pass)) else: es = Elasticsearch([os.getenv("ELASTICSEARCH_URL", "elasticsearch:9200")]) settings = { "settings": { "analysis": { "filter": { "brazilian_stop": { "type": "stop", "stopwords": "_brazilian_" }, "brazilian_keywords": { "type": "keyword_marker", "keywords": [] }, "brazilian_stemmer": {
def index_job_to_es_spark(spark, job, records_df, field_mapper_config): ''' Method to index records dataframe into ES Args: spark (pyspark.sql.session.SparkSession): spark instance from static job methods job (core.models.Job): Job for records records_df (pyspark.sql.DataFrame): records as pyspark DataFrame field_mapper_config (dict): XML2kvp field mapper configurations Returns: None - indexes records to ES ''' # init logging support spark.sparkContext.setLogLevel('INFO') log4jLogger = spark.sparkContext._jvm.org.apache.log4j logger = log4jLogger.LogManager.getLogger(__name__) # get index mapper index_mapper_handle = globals()['XML2kvpMapper'] # create rdd from index mapper def es_mapper_pt_udf(pt): # init mapper once per partition mapper = index_mapper_handle( field_mapper_config=field_mapper_config) for row in pt: yield mapper.map_record(record_string=row.document, db_id=row._id.oid, combine_id=row.combine_id, record_id=row.record_id, publish_set_id=job.publish_set_id, fingerprint=row.fingerprint) logger.info('###ES 1 -- mapping records') mapped_records_rdd = records_df.rdd.mapPartitions(es_mapper_pt_udf) # attempt to write index mapping failures to DB # filter our failures logger.info('###ES 2 -- filtering failures') failures_rdd = mapped_records_rdd.filter(lambda row: row[0] == 'fail') # if failures, write if not failures_rdd.isEmpty(): logger.info('###ES 3 -- writing indexing failures') failures_df = failures_rdd.map( lambda row: Row(db_id=row[1]['db_id'], record_id=row[1]['record_id'], mapping_error=row[1]['mapping_error'])).toDF() # add job_id as column failures_df = failures_df.withColumn('job_id', lit(job.id)) # write mapping failures to DB failures_df.select(['db_id', 'record_id', 'job_id', 'mapping_error']) \ .write.format("com.mongodb.spark.sql.DefaultSource") \ .mode("append") \ .option("uri", "mongodb://127.0.0.1") \ .option("database", "combine") \ .option("collection", "index_mapping_failure").save() # retrieve successes to index logger.info('###ES 4 -- filtering successes') to_index_rdd = mapped_records_rdd.filter( lambda row: row[0] == 'success') # create index in advance index_name = 'j%s' % job.id es_handle_temp = Elasticsearch(hosts=[settings.ES_HOST]) if not es_handle_temp.indices.exists(index_name): # put combine es index templates template_body = { 'template': '*', 'settings': { 'number_of_shards': 1, 'number_of_replicas': 0, 'refresh_interval': -1 }, 'mappings': { 'record': { "dynamic_templates": [{ "strings": { "match_mapping_type": "string", "mapping": { "type": "text", "fields": { "keyword": { "type": "keyword" } } } } }], 'date_detection': False, 'properties': { 'combine_db_id': { 'type': 'integer' } } } } } es_handle_temp.indices.put_template('combine_template', body=json.dumps(template_body)) # create index es_handle_temp.indices.create(index_name) # index to ES logger.info('###ES 5 -- writing to ES') to_index_rdd.saveAsNewAPIHadoopFile( path='-', outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat", keyClass="org.apache.hadoop.io.NullWritable", valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable", conf={ "es.resource": "%s/record" % index_name, "es.nodes": "%s:9200" % settings.ES_HOST, "es.mapping.exclude": "temp_id,__class__", "es.mapping.id": "temp_id", }) # refresh index es_handle_temp.indices.refresh(index_name) # return return to_index_rdd
import json from datetime import datetime import redis from django.shortcuts import render from django.views.generic.base import View from django.http import HttpResponse from search.models import JobboleEsModel, ZhihuAnswerEsModel, ZhihuQuestionEsModel, LagouEsModel from elasticsearch import Elasticsearch # elasticsearch client = Elasticsearch(hosts=['localhost']) # redis redis_cli = redis.StrictRedis() class IndexView(View): """首页""" def get(self, request): # 获取搜索关键词top10 hot_keywords = redis_cli.zrevrangebyscore('search_keywords_set', '+inf', '-inf', start=0, num=5) return render(request, 'index.html', {'hot_keywords': hot_keywords}) class SuggestView(View): """生成搜索建议并返回ajax响应""" def get(self, request):
def es_builder(hosts=None, port=9200): if hosts is None: hosts = '127.0.0.1' logger = logging.getLogger('elasticsearch') logger.setLevel(logging.WARNING) return Elasticsearch(hosts=hosts, port=port, timeout=60, request_timeout=60)
# companies with headquarter in Trento # -------------------------------------------------- from elasticsearch import Elasticsearch, helpers import certifi import csv import json import globalvariable as gv gv.init() http = gv.http index = gv.index_atk path = gv.path_write_tnurls es = Elasticsearch([http], use_ssl=True, verify_certs=True, ca_certs=certifi.where()) def main(): urls = url_scan() with open(path, "wb") as asd: writer = csv.writer(asd) for line in urls: writer.writerow(line) def url_scan(): url_list = []
def process_item(self, item, spider): es = Elasticsearch(self.es_hosts) es.index(index=self.index_name, doc_type=self.index_type, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8")) # es.index(index=self.index_name, doc_type=self.index_type, pipeline=self.ingest_pipeline, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8")) return item
#!/usr/bin/env python #encoding: utf-8 import time import datetime import json import csv import random from elasticsearch import Elasticsearch from ruman.time_utils import * #config weibo_es = Elasticsearch('219.224.134.216:9201', timeout=1000) INDEX_SENCE = 'social_sensing_task' TYPE_SENCE = 'rumor-media' TYPE_FLOAT_TEXT = "text" ES_INDEX_CAL_LIST = 'rumor_calculated_list' WEBOUSER_INDEX = 'weibo_user' def get_user(uid): uid = int(uid) query_body = {"size": 10, "query": {"match": {"uid": uid}}} res = weibo_es.search(index=WEBOUSER_INDEX, doc_type='user', body=query_body, request_timeout=100) hits = res['hits']['hits']
def __init__(self, address=MONGO_ADDRESS, port=MONGO_PORT): self.es = Elasticsearch( ['192.168.1.230:9200'], request_timeout=30000, )
def __init__(self, input_file,es_ip,es_port,index_name): self.input_file = input_file self.tree = self.__importXML() self.root = self.tree.getroot() self.es = Elasticsearch([{'host':es_ip,'port':es_port}]) self.index_name = index_name
@app.errorhandler(InvalidUsage) def handle_invalid_usage(error): response = jsonify(error.to_dict()) response.status_code = error.status_code return response def load_model(): """Load feature extractor model""" extractor = Extractor() return extractor extractor = load_model() es = Elasticsearch(hosts='localhost:9200') @app.route("/hello", methods=['GET']) def hello(): return "Hello, world!" @app.route("/extract_fea", methods=['GET', 'POST']) def extract_fea(): imgStr = request.values.get('img') if imgStr is None: raise InvalidUsage('parameter "img" is missing', status_code=410) try: img = read_img_blob(imgStr) except:
#!/usr/bin/env python # -*- coding: utf-8 -*- from elasticsearch import helpers from elasticsearch import Elasticsearch import datetime import hashlib import sys, os reload(sys) sys.setdefaultencoding('utf-8') es = Elasticsearch([ "http://192.168.241.47:9201", "http://192.168.241.46:9200", "192.168.241.50:9201" ], sniffer_timeout=False) class Saver(object): def __init__(self, cache_size=1000): self.actions = [] self._cache_size = cache_size def pass_data(self, dic, es=es, my_index="relation2", my_type="relation2"): # read data """ load to es :param dic: :param es: :param my_index: :param my_type: :return:
def create_app(config_class=Config): app = Flask("Flask Application for Nemo") app.config.from_object(config_class) if app.config['ELASTICSEARCH_URL']: if app.config['ES_CLIENT_CERT'] or app.config['ES_CLIENT_KEY']: app.elasticsearch = Elasticsearch( app.config['ELASTICSEARCH_URL'], use_ssl=True, verify_certs=True, client_cert=app.config['ES_CLIENT_CERT'], client_key=app.config['ES_CLIENT_KEY']) else: app.elasticsearch = Elasticsearch(app.config['ELASTICSEARCH_URL']) else: app.elasticsearch = None app.IIIFserver = app.config['IIIF_SERVER']\ if app.config['IIIF_SERVER'] else None if app.config['IIIF_MAPPING']: app.IIIFmapping = app.config['IIIF_MAPPING'] with open('{}/Mapping.json'.format(app.config['IIIF_MAPPING']), "r") as f: app.picture_file = load(f) for key, value in app.picture_file.items(): if type(value) == dict: if 'manifest' in value.keys(): app.IIIFviewer = True continue else: app.IIIFviewer = False app.picture_file = "" break else: app.IIIFviewer = False app.picture_file = "" db.init_app(app) migrate.init_app(app, db) login.init_app(app) mail.init_app(app) bootstrap.init_app(app) babel.init_app(app) sess.init_app(app) app.redis = Redis.from_url(app.config['REDIS_URL']) if not app.debug and not app.testing: if not os.path.exists('logs'): os.mkdir('logs') file_handler = RotatingFileHandler('logs/formulae-nemo.log', maxBytes=10240, backupCount=10) file_handler.setFormatter( logging.Formatter( '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]' )) file_handler.setLevel(logging.INFO) app.logger.addHandler(file_handler) app.logger.setLevel(logging.INFO) app.logger.info('Formulae-Nemo startup') from .auth import bp as auth_bp app.register_blueprint(auth_bp, url_prefix="/auth") from .search import bp as search_bp app.register_blueprint(search_bp, url_prefix="/search") if app.IIIFviewer is False: app.logger.warning(_l('Der Viewer konnte nicht gestartet werden.')) else: from .viewer import bp as viewer_bp viewer_bp.static_folder = app.config['IIIF_MAPPING'] app.register_blueprint(viewer_bp, url_prefix="/viewer") return app
# WARNING: This file contains cron jobs for elasticsearch, please use pure python for any kind of operation here, # Objects requiring flask app context may not work properly from elasticsearch import helpers, Elasticsearch from app.views.redis_store import redis_store from config import Config from app.views.celery_ import celery import psycopg2 es_store = Elasticsearch([Config.ELASTICSEARCH_HOST]) conn = psycopg2.connect(Config.SQLALCHEMY_DATABASE_URI) @celery.task(name='rebuild.events.elasticsearch') def cron_rebuild_events_elasticsearch(): """ Re-inserts all eligible events into elasticsearch Also clears event_index and event_delete redis sets :return: """ conn = psycopg2.connect(Config.SQLALCHEMY_DATABASE_URI) cur = conn.cursor() cur.execute( "SELECT id, name, description, searchable_location_name, organizer_name, organizer_description FROM events WHERE state = 'published' and deleted_at is NULL ;" ) events = cur.fetchall() event_data = ({ '_type': 'event', '_index': 'events', '_id': event_[0], 'name': event_[1],