예제 #1
0
파일: elastic.py 프로젝트: cvandeplas/plaso
    def __init__(self,
                 store,
                 filehandle=sys.stdout,
                 config=None,
                 filter_use=None):
        """Initializes the Elastic output module."""
        super(Elastic, self).__init__(store, filehandle, config, filter_use)
        self._counter = 0
        self._data = []
        # TODO: move this to an output module interface.
        self._formatters_manager = formatters_manager.EventFormatterManager

        elastic_host = getattr(config, 'elastic_server', '127.0.0.1')
        elastic_port = getattr(config, 'elastic_port', 9200)
        self._elastic_db = pyelasticsearch.ElasticSearch(
            u'http://{0:s}:{1:d}'.format(elastic_host, elastic_port))

        case_name = getattr(config, 'case_name', u'')
        document_type = getattr(config, 'document_type', u'')

        # case_name becomes the index name in Elastic.
        if case_name:
            self._index_name = case_name.lower()
        else:
            self._index_name = uuid.uuid4().hex

        # Name of the doc_type that holds the plaso events.
        if document_type:
            self._doc_type = document_type.lower()
        else:
            self._doc_type = u'event'

        # Build up a list of available hostnames in this storage file.
        self._hostnames = {}
        self._preprocesses = {}
예제 #2
0
    def setUp(self):
        super(ElasticsearchBoostBackendTestCase, self).setUp()

        # Wipe it clean.
        self.raw_es = pyelasticsearch.ElasticSearch(settings.HAYSTACK_CONNECTIONS['default']['URL'])
        clear_elasticsearch_index()

        # Stow.
        self.old_ui = connections['default'].get_unified_index()
        self.ui = UnifiedIndex()
        self.smmi = ElasticsearchBoostMockSearchIndex()
        self.ui.build(indexes=[self.smmi])
        connections['default']._index = self.ui
        self.sb = connections['default'].get_backend()

        self.sample_objs = []

        for i in xrange(1, 5):
            mock = AFourthMockModel()
            mock.id = i

            if i % 2:
                mock.author = 'daniel'
                mock.editor = 'david'
            else:
                mock.author = 'david'
                mock.editor = 'daniel'

            mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i)
            self.sample_objs.append(mock)
예제 #3
0
    def __init__(self, output_mediator, **kwargs):
        """Initializes the output module object.

    Args:
      output_mediator: The output mediator object (instance of OutputMediator).
    """
        super(ElasticSearchOutputModule,
              self).__init__(output_mediator, **kwargs)
        self._counter = 0
        self._data = []

        elastic_host = self._output_mediator.GetConfigurationValue(
            u'elastic_server', default_value=u'127.0.0.1')
        elastic_port = self._output_mediator.GetConfigurationValue(
            u'elastic_port', default_value=9200)
        self._elastic_db = pyelasticsearch.ElasticSearch(
            u'http://{0:s}:{1:d}'.format(elastic_host, elastic_port))

        case_name = self._output_mediator.GetConfigurationValue(
            u'case_name', default_value=u'')
        document_type = self._output_mediator.GetConfigurationValue(
            u'document_type', default_value=u'')

        # case_name becomes the index name in Elastic.
        if case_name:
            self._index_name = case_name.lower()
        else:
            self._index_name = uuid.uuid4().hex

        # Name of the doc_type that holds the plaso events.
        if document_type:
            self._doc_type = document_type.lower()
        else:
            self._doc_type = u'event'
예제 #4
0
    def search(self, query, size=1000, recent=False):
        """Search an elasticsearch server.

        `query` parameter is the complicated query structure that
        pyelasticsearch uses. More details in their documentation.

        `size` is the max number of results to return from the search
        engine. We default it to 1000 to ensure we don't loose things.
        For certain classes of queries (like faceted ones), this can actually
        be set very low, as it won't impact the facet counts.

        `recent` search only most recent indexe(s), assuming this is basically
        a real time query that you only care about the last hour of time.
        Using recent dramatically reduces the load on the ES cluster.

        The returned result is a ResultSet query.

        """
        es = pyelasticsearch.ElasticSearch(self._url)
        args = {'size': size}
        if recent:
            # today's index
            datefmt = 'logstash-%Y.%m.%d'
            now = datetime.datetime.utcnow()
            lasthr = now - datetime.timedelta(hours=1)
            indexes = [now.strftime(datefmt)]
            if (lasthr.strftime(datefmt) != now.strftime(datefmt)):
                indexes.append(lasthr.strftime(datefmt))
            args['index'] = indexes

        results = es.search(query, **args)
        return ResultSet(results)
예제 #5
0
    def setUp(self):
        super(ElasticsearchSearchBackendTestCase, self).setUp()

        # Wipe it clean.
        self.raw_es = pyelasticsearch.ElasticSearch(settings.HAYSTACK_CONNECTIONS['default']['URL'])
        clear_elasticsearch_index()

        # Stow.
        self.old_ui = connections['default'].get_unified_index()
        self.ui = UnifiedIndex()
        self.smmi = ElasticsearchMockSearchIndex()
        self.smtmmi = ElasticsearchMaintainTypeMockSearchIndex()
        self.ui.build(indexes=[self.smmi])
        connections['default']._index = self.ui
        self.sb = connections['default'].get_backend()

        # Force the backend to rebuild the mapping each time.
        self.sb.existing_mapping = {}
        self.sb.setup()

        self.sample_objs = []

        for i in xrange(1, 4):
            mock = MockModel()
            mock.id = i
            mock.author = 'daniel%s' % i
            mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i)
            self.sample_objs.append(mock)
예제 #6
0
 def __init__(self, settings=None):
     self.settings = settings or {
         "number_of_shards": 1,
         "number_of_replicas": 0
     }
     self.client = es.ElasticSearch(urls=["http://localhost:9200"])
     self.index_name = None
     self._buffer = []
예제 #7
0
def clear_elasticsearch_index():
    # Wipe it clean.
    raw_es = pyelasticsearch.ElasticSearch(settings.HAYSTACK_CONNECTIONS['default']['URL'])
    try:
        raw_es.delete_index(settings.HAYSTACK_CONNECTIONS['default']['INDEX_NAME'])
        raw_es.refresh()
    except (requests.RequestException, pyelasticsearch.ElasticHttpError):
        pass
예제 #8
0
def recreate_index():
    index = "acousticbrainz"
    es = pyelasticsearch.ElasticSearch(config.ELASTICSEARCH_ADDRESS)
    try:
        es.delete_index(index)
    except pyelasticsearch.ElasticHttpNotFoundError:
        pass
    es.create_index(index)
예제 #9
0
파일: elastic.py 프로젝트: kr11/plaso
  def SetServerInformation(self, elastic_host, elastic_port):
    """Set the ElasticSearch connection.

    Args:
      elastic_host: the hostname or IP address of the ElasticSearch server.
      elastic_port: the port number that the ElasticSearch is listening on.
    """
    self._elastic_db = pyelasticsearch.ElasticSearch(
        u'http://{0:s}:{1:d}'.format(elastic_host, elastic_port))
예제 #10
0
def _configure_es(host, timeout):
    import pyelasticsearch
    global elasticsearch
    try:
        elasticsearch = pyelasticsearch.ElasticSearch(host,
                                                      timeout=timeout,
                                                      max_retries=0)
    except Exception as e:
        elasticsearch = ErrorProxy(e)
예제 #11
0
 def __init__(self, config, quit_check_callback=None):
     super(ElasticSearchCrashStorage,
           self).__init__(config, quit_check_callback)
     self.transaction = config.transaction_executor_class(
         config, self, quit_check_callback)
     if self.config.elasticsearch_urls:
         self.es = pyelasticsearch.ElasticSearch(
             self.config.elasticsearch_urls,
             timeout=self.config.elasticsearch_timeout)
     else:
         config.logger.warning('elasticsearch crash storage is disabled.')
예제 #12
0
def _check_er_availability():
    global es_url
    global query_dir
    if not classifier:
        if not er:
            health = 'NotInstalled'
        elif not es_url or not query_dir:
            health = 'NotConfigured'
    else:
        url = classifier.config.es_url
        es = pyelasticsearch.ElasticSearch(url)
        health = {'Configured': {'elastic-search': es.health()['status']}}
    return health
예제 #13
0
파일: elastic.py 프로젝트: f-s-p/plaso
    def __init__(self,
                 store,
                 formatter_mediator,
                 filehandle=sys.stdout,
                 config=None,
                 filter_use=None):
        """Initializes the log output formatter object.

    Args:
      store: A storage file object (instance of StorageFile) that defines
             the storage.
      formatter_mediator: the formatter mediator object (instance of
                          FormatterMediator).
      filehandle: Optional file-like object that can be written to.
                  The default is sys.stdout.
      config: Optional configuration object, containing config information.
              The default is None.
      filter_use: Optional filter object (instance of FilterObject).
                  The default is None.
    """
        super(ElasticSearchOutput, self).__init__(store,
                                                  formatter_mediator,
                                                  filehandle=filehandle,
                                                  config=config,
                                                  filter_use=filter_use)
        self._counter = 0
        self._data = []

        elastic_host = getattr(config, 'elastic_server', '127.0.0.1')
        elastic_port = getattr(config, 'elastic_port', 9200)
        self._elastic_db = pyelasticsearch.ElasticSearch(
            u'http://{0:s}:{1:d}'.format(elastic_host, elastic_port))

        case_name = getattr(config, 'case_name', u'')
        document_type = getattr(config, 'document_type', u'')

        # case_name becomes the index name in Elastic.
        if case_name:
            self._index_name = case_name.lower()
        else:
            self._index_name = uuid.uuid4().hex

        # Name of the doc_type that holds the plaso events.
        if document_type:
            self._doc_type = document_type.lower()
        else:
            self._doc_type = u'event'

        # Build up a list of available hostnames in this storage file.
        self._hostnames = {}
        self._preprocesses = {}
예제 #14
0
    def get_s3_bucket_dir_to_index(self):
        if len(self.path_name_s3_billing) == 1:
            prefix = '/' + '/'.join(self.path_name_s3_billing) + '/'
        else:
            prefix = '/'.join(self.path_name_s3_billing) + '/'

        key_names = self.s3.list_objects(Bucket=self.bucketname,
                                         Prefix=prefix,
                                         Delimiter='/')
        s3_dir_names = []

        if 'CommonPrefixes' not in key_names:
            return 1

        for keys in key_names['CommonPrefixes']:
            s3_dir_names.append(keys['Prefix'].split('/')[-2])

        s3_dir_names.sort()
        es = pyes.ElasticSearch('http://*****:*****@timestamp'])

        index_time.sort(reverse=True)

        dir_start = 0
        dir_end = None

        if index_time:
            current_dir = dtd.today().strftime('%Y%m01') + '-' + (dtd.today() + \
                                    relativedelta(months=1)).strftime('%Y%m01')

            last_ind_dir = index_time[0].split('T')[0].replace('-', '')
            last_ind_dir = dtdt.strptime(last_ind_dir, '%Y%m%d').strftime(
                '%Y%m01') + '-' + (dtdt.strptime(last_ind_dir, '%Y%m%d') +
                                   relativedelta(months=1)).strftime('%Y%m01')
            dir_start = s3_dir_names.index(last_ind_dir)
            dir_end = s3_dir_names.index(current_dir) + 1

        s3_dir_to_index = s3_dir_names[dir_start:dir_end]
        print('Months to be indexed: {}'.format(', '.join(s3_dir_to_index)))
        # returning only the dirnames which are to be indexed
        return s3_dir_to_index
    def __init__(self, connection_alias, **connection_options):
        super(ElasticsearchSearchBackend, self).__init__(connection_alias, **connection_options)

        if not 'URL' in connection_options:
            raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias)

        if not 'INDEX_NAME' in connection_options:
            raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias)

        self.conn = pyelasticsearch.ElasticSearch(connection_options['URL'], timeout=self.timeout)
        self.index_name = connection_options['INDEX_NAME']
        self.log = logging.getLogger('haystack')
        self.setup_complete = False
        self.existing_mapping = {}
예제 #16
0
    def get(self, **kwargs):
        '''Return the result of a custom query. '''
        params = external_common.parse_arguments(self.filters, kwargs)

        if not params.query:
            raise MissingArgumentError('query')

        try:
            query = json.loads(params.query)
        except ValueError:
            raise BadArgumentError(
                'query',
                msg="Invalid JSON value for parameter 'query'"
            )

        es = pyelasticsearch.ElasticSearch(
            urls=self.config.elasticsearch_urls,
            timeout=self.config.elasticsearch_timeout_extended,
        )

        # Set indices.
        indices = []
        if not params.indices:
            # By default, use the last two indices.
            today = utc_now()
            last_week = today - datetime.timedelta(days=7)

            indices = self.generate_list_of_indexes(last_week, today)
        elif len(params.indices) == 1 and params.indices[0] == 'ALL':
            # If we want all indices, just do nothing.
            pass
        else:
            indices = params.indices

        search_args = {}
        if indices:
            search_args['index'] = indices
            search_args['doc_type'] = self.config.elasticsearch_doctype

        try:
            results = es.search(
                query,
                **search_args
            )
        except ElasticHttpNotFoundError, e:
            missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
            raise ResourceNotFound(
                "elasticsearch index '%s' does not exist" % missing_index
            )
예제 #17
0
    def __init__(self, config, quit_check_callback=None):
        super(ElasticSearchCrashStorage,
              self).__init__(config, quit_check_callback)
        self.transaction = config.transaction_executor_class(
            config, self, quit_check_callback)
        if self.config.elasticsearch_urls:
            self.es = pyelasticsearch.ElasticSearch(
                self.config.elasticsearch_urls, timeout=self.config.timeout)

            settings_json = open(
                self.config.elasticsearch_index_settings).read()
            self.index_settings = json.loads(settings_json %
                                             self.config.elasticsearch_doctype)
        else:
            config.logger.warning('elasticsearch crash storage is disabled.')
예제 #18
0
    def search(self, query, size=1000, recent=False, days=0):
        """Search an elasticsearch server.

        `query` parameter is the complicated query structure that
        pyelasticsearch uses. More details in their documentation.

        `size` is the max number of results to return from the search
        engine. We default it to 1000 to ensure we don't loose things.
        For certain classes of queries (like faceted ones), this can actually
        be set very low, as it won't impact the facet counts.

        `recent` search only most recent indexe(s), assuming this is basically
        a real time query that you only care about the last hour of time.
        Using recent dramatically reduces the load on the ES cluster.

        `days` search only the last number of days.

        The returned result is a ResultSet query.

        """
        es = pyelasticsearch.ElasticSearch(self._url)
        args = {'size': size}
        if recent or days:
            # today's index
            datefmt = self._indexfmt
            now = datetime.datetime.utcnow()
            indexes = []
            latest_index = now.strftime(datefmt)
            if self._is_valid_index(es, latest_index):
                indexes.append(latest_index)
            if recent:
                lasthr = now - datetime.timedelta(hours=1)
                lasthr_index = lasthr.strftime(datefmt)
                if lasthr_index != latest_index:
                    if self._is_valid_index(es, lasthr_index):
                        indexes.append(lasthr.strftime(datefmt))
            for day in range(1, days):
                lastday = now - datetime.timedelta(days=day)
                index_name = lastday.strftime(datefmt)
                if self._is_valid_index(es, index_name):
                    indexes.append(index_name)
            args['index'] = indexes

        results = es.search(query, **args)
        return ResultSet(results)
예제 #19
0
    def main(self):
        storage = self.config.elasticsearch_storage_class(self.config)

        crash_file = open(self.config.processed_crash_file)
        processed_crash = json.load(crash_file)
        es_index = storage.get_index_for_crash(processed_crash)
        es_doctype = self.config.elasticsearch_doctype
        crash_id = processed_crash['uuid']

        storage.save_processed(processed_crash)

        # Verify the crash has been inserted
        es = pyelasticsearch.ElasticSearch(self.config.elasticsearch_urls)

        crash = es.get(es_index, es_doctype, crash_id)
        assert crash['exists']

        print 'Success - %s/%s/%s' % (es_index, es_doctype, crash_id)
예제 #20
0
    def delete_old_indices(self):
        now = utc_now()
        policy_delay = datetime.timedelta(weeks=self.config.retention_policy)
        time_limit = (now - policy_delay).replace(tzinfo=None)

        es = pyelasticsearch.ElasticSearch(
            self.config.elasticsearch.elasticsearch_urls,
            timeout=self.config.elasticsearch.elasticsearch_timeout
        )

        state = es.cluster_state()
        indices = state['metadata']['indices'].keys()

        aliases = es.aliases()

        for index in indices:
            # Some indices look like 'socorro%Y%W_%Y%M%d', but they are
            # aliased to the expected format of 'socorro%Y%W'. In such cases,
            # replace the index with the alias.
            if index in aliases:
                index_aliases = aliases[index]['aliases'].keys()
                if index_aliases:
                    index = index_aliases[0]

            if not re.match(
                self.config.elasticsearch.elasticsearch_index_regex,
                index
            ):
                # This index doesn't look like a crash index, let's skip it.
                continue

            # This won't take the week part of our indices into account...
            index_date = datetime.datetime.strptime(
                index,
                self.config.elasticsearch.elasticsearch_index
            )
            # So we need to get that differently, and then add it to the date.
            index_date += datetime.timedelta(weeks=int(index[-2:]))

            if index_date < time_limit:
                es.delete_index(index)  # Bad index! Go away!
예제 #21
0
파일: related.py 프로젝트: ehsan/airmozilla
def get_connection():
    return pyelasticsearch.ElasticSearch(settings.ELASTICSEARCH_URL)
예제 #22
0
import re
import config

# Setup Flask
app = Flask(__name__)

# Setup Mongo
conn = pymongo.Connection()  # defaults to localhost
db = conn.agile_data
emails = db['emails']
addresses_per_email = db['addresses_per_email']
emails_per_address = db['emails_per_address']
sent_distributions = db['sent_distributions']

# Setup ElasticSearch
elastic = pyelasticsearch.ElasticSearch(config.ELASTIC_URL)


# Controller: Fetch an email and display it
@app.route("/email/<message_id>")
def email(message_id):
    email = emails.find_one({'message_id': message_id})
    addresses = addresses_per_email.find_one({'message_id': message_id})
    sent_dist_records = sent_distributions.find_one(
        {'address': email['from']['address']})
    return render_template('partials/email.html',
                           email=email,
                           addresses=addresses['addresses'],
                           chart_json=json.dumps(
                               sent_dist_records['sent_distribution']),
                           sent_distribution=sent_dist_records)
예제 #23
0
import pyelasticsearch as pyes

keyword = '張世瑛'

es = pyes.ElasticSearch('http://localhost:9200')

# query = {'query':{'match':{'message':'資訊部'}}}
# query = {'query':{'match_phrase':{'message':'資訊部'}}}
# query = {'query':{'match_phrase':{'comments.data.message':'鄧美玉'}}}

# query = {
#     'filtered':{
#         'query':{
#             'multi_match':{
#                 'type':'phrase',
#                 'query':keyword,
#                 'fields':['comments.data.message','message']
#             }
#         },
#         'filter':{
#             'and':[
#                 {'match_phrase':{'message':'會計學'}},
#                 {'match_phrase':{'message':'鄧美玉'}}
#             ]
#         }
#     }
# }

query = {
    'query': {
        'multi_match': {
예제 #24
0
import pyelasticsearch
es = pyelasticsearch.ElasticSearch("http://localhost:9200")

def autocomplete_genre(g):
    query = {"placeholder": {"text": g.lower(), "completion": {"field": "toptags.tag.name_complete"}}}
    completes = es._search_or_count('_suggest', query, index="acousticbrainz")
    if "placeholder" in completes:
        options = completes["placeholder"][0]["options"]
        options = sorted(options, key=lambda x: x["score"])
    else:
        options = []
    return [o["text"] for o in options]

def autocomplete_artist(g):
    query = {"placeholder": {"text": g.lower(), "completion": {"field": "metadata.tags.artist_complete"}}}
    completes = es._search_or_count('_suggest', query, index="acousticbrainz")
    if "placeholder" in completes:
        options = completes["placeholder"][0]["options"]
        options = sorted(options, key=lambda x: x["score"])
    else:
        options = []
    return [o["text"] for o in options]


def autocomplete_track(t):
    query = {"placeholder": {"text": t.lower(), "completion": {"field": "metadata.tags.title_complete"}}}
    completes = es._search_or_count('_suggest', query, index="acousticbrainz")
    if "placeholder" in completes:
        options = completes["placeholder"][0]["options"]
        options = sorted(options, key=lambda x: x["score"])
    else:
예제 #25
0
logger = logging.getLogger('z.elasticsearch')

# Enable these to get full debugging information.
# logging.getLogger('pyelasticsearch').setLevel(logging.DEBUG)
# logging.getLogger('requests').setLevel(logging.DEBUG)

# The subset of settings.ES_INDEXES we are concerned with.
ALIAS = settings.ES_INDEXES['webapp']

if hasattr(settings, 'ES_URLS'):
    ES_URL = settings.ES_URLS[0]
else:
    ES_URL = 'http://127.0.0.1:9200'

ES = pyelasticsearch.ElasticSearch(ES_URL)

job = 'lib.es.management.commands.reindex_mkt.run_indexing'
time_limits = settings.CELERY_TIME_LIMITS[job]


@task
def delete_index(old_index):
    """Removes the index."""
    sys.stdout.write('Removing index %r' % old_index)
    ES.delete_index(old_index)


@task
def create_index(new_index, alias, settings):
    """Creates a mapping for the new index.
예제 #26
0
파일: base.py 프로젝트: niwinz/needlestack
 def __init__(self, urls, settings, *args, **kwargs):
     self._default_settings = settings
     self._es = pyelasticsearch.ElasticSearch(urls, *args, **kwargs)
예제 #27
0
sys.path.append("../")

import os
#os.environ.setdefault("DJANGO_SETTINGS_MODULE", "beCOMPANY.settings")
#import beCOMPANY
#import beCOMPANY.settings as settings
from elasticmodel import *
from dateutil.parser import parse
#import psycopg2
import threading
from dateutil.relativedelta import relativedelta
import time

import pyelasticsearch
from pyelasticsearch import bulk_chunks
es = pyelasticsearch.ElasticSearch(port=9200)
eastern = timezone('US/Eastern')
'''
try:
    dbstr="dbname=" + settings.DATABASES['default']['NAME'] + \
          " user="******" password="******" host=" + settings.DATABASES['default']['HOST'] + \
          " port=" + settings.DATABASES['default']['PORT']
          
    c=psycopg2.connect(dbstr)
except:
    pass #print	 "I am unable to connect to the database."
'''
Instrument.init()
Feed.init()
예제 #28
0
    Search module for the scrAPI website.
"""
import logging
import pyelasticsearch
import search_settings

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


# These are the doc_types that exist in the search database
TYPES = ['article', 'citation']

try:
    elastic = pyelasticsearch.ElasticSearch(
        search_settings.ELASTIC_URI,
        timeout=search_settings.ELASTIC_TIMEOUT
    )
    logging.getLogger('pyelasticsearch').setLevel(logging.WARN)
    logging.getLogger('requests').setLevel(logging.WARN)
    elastic.health()
except pyelasticsearch.exceptions.ConnectionError as e:
    logger.error(e)
    logger.warn("The SEARCH_ENGINE setting is set to 'elastic', but there "
                "was a problem starting the elasticsearch interface. Is "
                "elasticsearch running?")
    elastic = None


def requires_search(func):
    def wrapped(*args, **kwargs):
        if elastic is not None:
예제 #29
0
from the data folder
'''

dataFolder = "../data/"


#===========================================================================
#   THIS IS ONE WAY TO INDEX. there seems to be a limit of 1k fields per index. Which is generous,
#    but surprisingly one csv file had more than that ??
#   also is this limit a lucene/es limit, or the client limit. Does the "official client improve this" 
#===========================================================================
#csv2es --index-name potatoes --doc-type potato --import-file potatoes.cs


'''
pip-install csv2es
pip-install pyelasticsearch
'''

es_client = pyelasticsearch.ElasticSearch('http://localhost:9200/')
myDocuments = csv2es.documents_from_file(es_client, "./test.csv", ",", quiet = False)
csv2es.perform_bulk_index(host = 'http://localhost:9200/'
                          , index_name = "baseindex",
                           doc_type = "basedoctype",
                           doc_fetch = myDocuments,
                           docs_per_chunk = 5000,
                           bytes_per_chunk = 100000,
                           parallel = 1)


print("END of elastic search test script")
예제 #30
0
def main(global_config, **settings):
    """ This function returns a Pyramid WSGI application.
    """
    auth_tkt_policy = AuthTktAuthenticationPolicy(
        settings['authtkt.secret'],
        hashalg='sha512',
        callback=groupfinder,
        max_age=2592000,
        secure=asbool(settings.get('authtkt.secure', 'false')))
    auth_token_policy = AuthTokenAuthenticationPolicy(
        callback=groupfinder
    )
    authorization_policy = ACLAuthorizationPolicy()
    authentication_policy = AuthenticationStackPolicy()
    authentication_policy.add_policy('auth_tkt', auth_tkt_policy)
    authentication_policy.add_policy('auth_token', auth_token_policy)
    # set crypto key
    encryption.ENCRYPTION_SECRET = settings.get('encryption_secret')
    # import this later so encyption key can be monkeypatched
    from appenlight.models import DBSession, register_datastores
    # update config with cometd info
    settings['cometd_servers'] = {'server': settings['cometd.server'],
                                  'secret': settings['cometd.secret']}

    # Create the Pyramid Configurator.
    settings['_mail_url'] = settings['mailing.app_url']
    config = CythonCompatConfigurator(
        settings=settings,
        authentication_policy=authentication_policy,
        authorization_policy=authorization_policy,
        root_factory='appenlight.security.RootFactory',
        default_permission='view')
    # custom registry variables

    # resource type information
    config.registry.resource_types = ['resource', 'application']
    # plugin information
    config.registry.appenlight_plugins = {}

    config.set_default_csrf_options(require_csrf=True, header='X-XSRF-TOKEN')
    config.add_view_deriver('appenlight.predicates.csrf_view',
                            name='csrf_view')

    # later, when config is available
    dogpile_config = {'url': settings['redis.url'],
                      "redis_expiration_time": 86400,
                      "redis_distributed_lock": True}
    cache_regions.regions = cache_regions.CacheRegions(dogpile_config)
    config.registry.cache_regions = cache_regions.regions
    engine = engine_from_config(settings, 'sqlalchemy.',
                                json_serializer=json.dumps)
    DBSession.configure(bind=engine)

    # json rederer that serializes datetime
    config.add_renderer('json', json_renderer)
    config.set_request_property('appenlight.lib.request.es_conn', 'es_conn')
    config.set_request_property('appenlight.lib.request.get_user', 'user',
                                reify=True)
    config.set_request_property('appenlight.lib.request.get_csrf_token',
                                'csrf_token', reify=True)
    config.set_request_property('appenlight.lib.request.safe_json_body',
                                'safe_json_body', reify=True)
    config.set_request_property('appenlight.lib.request.unsafe_json_body',
                                'unsafe_json_body', reify=True)
    config.add_request_method('appenlight.lib.request.add_flash_to_headers',
                              'add_flash_to_headers')
    config.add_request_method('appenlight.lib.request.get_authomatic',
                              'authomatic', reify=True)

    config.include('pyramid_redis_sessions')
    config.include('pyramid_tm')
    config.include('pyramid_jinja2')
    config.include('appenlight_client.ext.pyramid_tween')
    config.include('ziggurat_foundations.ext.pyramid.sign_in')
    es_server_list = aslist(settings['elasticsearch.nodes'])
    redis_url = settings['redis.url']
    log.warning('Elasticsearch server list: {}'.format(es_server_list))
    log.warning('Redis server: {}'.format(redis_url))
    config.registry.es_conn = pyelasticsearch.ElasticSearch(es_server_list)
    config.registry.redis_conn = redis.StrictRedis.from_url(redis_url)

    config.registry.redis_lockmgr = Redlock([settings['redis.redlock.url'], ],
                                            retry_count=0, retry_delay=0)
    # mailer
    config.registry.mailer = Mailer.from_settings(settings)

    # Configure sessions
    session_factory = session_factory_from_settings(settings)
    config.set_session_factory(session_factory)

    # Configure renderers and event subscribers
    config.add_jinja2_extension('jinja2.ext.loopcontrols')
    config.add_jinja2_search_path('appenlight:templates')
    # event subscribers
    config.add_subscriber("appenlight.subscribers.application_created",
                          "pyramid.events.ApplicationCreated")
    config.add_subscriber("appenlight.subscribers.add_renderer_globals",
                          "pyramid.events.BeforeRender")
    config.add_subscriber('appenlight.subscribers.new_request',
                          'pyramid.events.NewRequest')
    config.add_view_predicate('context_type_class',
                              'appenlight.predicates.contextTypeClass')

    register_datastores(es_conn=config.registry.es_conn,
                        redis_conn=config.registry.redis_conn,
                        redis_lockmgr=config.registry.redis_lockmgr)

    # base stuff and scan

    # need to ensure webassets exists otherwise config.override_asset()
    # throws exception
    if not os.path.exists(settings['webassets.dir']):
        os.mkdir(settings['webassets.dir'])
    config.add_static_view(path='appenlight:webassets',
                           name='static', cache_max_age=3600)
    config.override_asset(to_override='appenlight:webassets/',
                          override_with=settings['webassets.dir'])

    config.include('appenlight.views')
    config.include('appenlight.views.admin')
    config.scan(ignore=['appenlight.migrations', 'appenlight.scripts',
                        'appenlight.tests'])

    config.add_directive('register_appenlight_plugin',
                         register_appenlight_plugin)

    for entry_point in iter_entry_points(group='appenlight.plugins'):
        plugin = entry_point.load()
        plugin.includeme(config)

    # include other appenlight plugins explictly if needed
    includes = aslist(settings.get('appenlight.includes', []))
    for inc in includes:
        config.include(inc)

    # run this after everything registers in configurator

    def pre_commit():
        jinja_env = config.get_jinja2_environment()
        jinja_env.filters['tojson'] = json.dumps
        jinja_env.filters['toJSONUnsafe'] = jinja2_filters.toJSONUnsafe

    config.action(None, pre_commit, order=PHASE3_CONFIG + 999)

    def wrap_config_celery():
        configure_celery(config.registry)

    config.action(None, wrap_config_celery, order=PHASE3_CONFIG + 999)

    app = config.make_wsgi_app()
    return app