コード例 #1
0
ファイル: tagger.py プロジェクト: shreya1313/Sentence-Tagger
def sent(jd_id):
    details = srv.get_jd_details(jd_id)
    html = details.get('jd_html')

    if request.method == 'GET':
        form = TagEntryForm()
        present_tags = tag_srv.get_tagged_sentences_for_jd(jd_id).get(
            'taggings')

        if not present_tags:
            present_tags = [{
                'sentence': sent,
                'tag': 'Tags'
            } for sent in details.get('jd_sentences')]

        for ptag in present_tags:
            st = SentenceTag()
            st.sentence = ptag.get('sentence')
            st.tag = ptag.get('tag')

            form.tag_list.append_entry(st)

        return render_template('sent.html', form=form, jd_html=Markup(html))
    else:
        out = Cut({}, sep='-')

        for key, value in request.form.to_dict().items():
            out.setdefault(key, value)

        out = dict(out)
        tags = list(out['tag_list'].values())

        tag_srv.perform_sentence_tagging(jd_id, tags)

        return render_template('sent.html', tagged=True, jd_html=Markup(html))
コード例 #2
0
ファイル: utils.py プロジェクト: open-contracting/spoonbill
def add_paths_to_schema(schema):
    """
    Extracts full path for each title in schema; creates paths list of full title for each
    :param unres_schema: Unresolved schema
    :param schema: Schema object that will be updated
    :return: Schema with full title paths
    """
    proxy = Cut(copy.deepcopy(schema))
    updated_items = {}
    for table in proxy["properties"].keys():
        path_item = Cut({table: copy.deepcopy(proxy["properties"][table])})
        # Generating path to each title in schema
        for path in generate_paths({table: proxy["properties"][table]}):
            if path[-1] == "title":
                object_path = ".".join(path[:-1])
                path_item[object_path]["$path"] = path
        updated_items[table] = path_item[table]
    proxy["properties"] = updated_items
    # Generating array with title paths for each title in schema
    title_paths = [path for path in title_path(proxy["properties"])]

    for path_list in title_paths:
        location = "properties." + ".".join(path_list[-1][:-1])
        proxy[location]["$title"] = path_list

    return proxy
コード例 #3
0
class Config(object):
    """Provides configuration for the Slow Start Rewatch."""
    def __init__(
        self,
        filename: str = DEFAULT_CONFIG_FILENAME,
    ) -> None:
        """Initialize Config."""
        log.debug("default_config_load", filename=filename)

        config_list = [os.path.join(ROOT_DIR, filename)]

        self.config = Cut(anyconfig.load(config_list))

        self._substitute_placeholders()

        self.storage = ConfigStorage(self.config["local_config_file"])

    def __getitem__(self, key):
        """Return the config item."""
        return self.config[key]

    def __setitem__(self, key, item_value) -> None:
        """Set the config item."""
        if key in self.config and self.config[key] == item_value:
            return

        log.debug("config_save", key=key, item_value=item_value)
        self.config[key] = item_value
        self.storage.save(self.config)

    def __contains__(self, key):
        """Return true if an item exists in the config."""
        return key in self.config

    def load(self) -> None:
        """Load config items from the local storage."""
        self.config.update(self.storage.load())

    def _substitute_placeholders(self) -> None:
        """Substitute the placeholders in the config."""
        mapping = {
            "home_dir": HOME_DIR,
            "ps": os.path.sep,
            "version": version(),
        }
        keys = [
            "data_dir",
            "schedule_file",
            "local_config_file",
            "reddit.user_agent",
        ]

        log.debug("config_substitute", mapping=mapping, keys=keys)

        for key in keys:
            self.config[key] = Template(self.config[key]).safe_substitute(
                mapping, )
コード例 #4
0
    def __init__(self, config: dict = {}, datadir: str = None):
        self.shutdown_event: threading.Event = threading.Event()

        self.helper = OpenCTIConnectorHelper(config)
        logger.info("Connected to OpenCTI")

        if (self.helper.connect_live_stream_id is None
                or self.helper.connect_live_stream_id == "ChangeMe"):
            raise ValueError("Missing Live Stream ID")

        self.config = Cut(config)

        # Start streaming from 1 second ago
        self.helper.set_state({
            "connectorLastEventId":
            str(int(round(time.time() * 1000)) - 1000)
        })

        # Get the external URL as configured in OpenCTI Settings page
        query = """
        query SettingsQuery {
            settings {
                id
                platform_url
            }
        }
        """
        _settings = self.helper.api.query(query)["data"]["settings"]
        self.config["opencti.platform_url"] = _settings.get(
            "platform_url", None)

        self._connect_elasticsearch()

        if self.config["connector.mode"] == "ecs":
            self.import_manager = IntelManager(self.helper, self.elasticsearch,
                                               self.config, datadir)

            self.sightings_manager = SignalsManager(
                config=self.config,
                shutdown_event=self.shutdown_event,
                opencti_client=self.helper,
                elasticsearch_client=self.elasticsearch,
            )
        elif self.config["connector.mode"] == "stix":
            self.import_manager = StixManager(self.helper, self.elasticsearch,
                                              self.config, datadir)

            self.sightings_manager = None
        else:
            logger.error(
                f"connector.mode: {self.config['connector.mode']} is unsupported. Should be 'ecs' or 'stix'"
            )
コード例 #5
0
    def __init__(
        self,
        filename: str = DEFAULT_CONFIG_FILENAME,
    ) -> None:
        """Initialize Config."""
        log.debug("default_config_load", filename=filename)

        config_list = [os.path.join(ROOT_DIR, filename)]

        self.config = Cut(anyconfig.load(config_list))

        self._substitute_placeholders()

        self.storage = ConfigStorage(self.config["local_config_file"])
コード例 #6
0
def initialize_sentry() -> None:
    """Initialize Sentry monitoring.

    To enable Sentry, please define
    settings "project.sentry_dsn" or
    environment SENTRY_ENDPOINT.

    """
    endpoint = settings["project.sentry_dsn"]
    if not endpoint:
        logger.warning("Sentry not initialized.")
        return

    toml_path = Path().cwd().joinpath("pyproject.toml")
    with open(str(toml_path), "r") as file:
        data = Cut(toml.load(file))
        current_version = data["tool.poetry.version"]

    try:
        sentry_sdk.init(
            dsn=endpoint,
            integrations=[AwsLambdaIntegration()],
            environment=settings.stela_options.current_environment,
            release=current_version,
        )
        logger.info("Sentry initialized.")
    except BadDsn as error:
        logger.error(f"Error when initializing Sentry: {error}")
コード例 #7
0
    def test_get_order_price_min_days(
            client,
            all_headers,
            _items_data_min_days):
        cart_items_url = url_for("api.cart_items")
        resp = client.post(
            cart_items_url,
            json={"items": _items_data_min_days},
            headers=all_headers["customer_headers"])
        assert resp.status_code == 201

        resp_data = Cut(resp.json)
        cart_id = resp_data["data.cart.id"]

        order_price_url = url_for("api.order_price", cart_id=cart_id)
        resp = client.get(
            order_price_url,
            headers=all_headers["customer_headers"])
        assert resp.status_code == 200

        assert resp.json["cost_usd"] == sum([
            MIN_FICTION_COST,
            MIN_NOVEL_COST,
            MIN_REGULAR_COST
        ])
コード例 #8
0
 async def get_users(self,
                     search="",
                     country="",
                     state="",
                     state_code="",
                     city="",
                     school_id="",
                     tool_id="",
                     sort="recommended",
                     ordinal="",
                     **kwargs):
     res = Cut(await self.search(content="users",
                                 search=search,
                                 country=country,
                                 state=state,
                                 stateCode=state_code,
                                 city=city,
                                 schools=school_id,
                                 tools=tool_id,
                                 sort=sort,
                                 ordinal=ordinal,
                                 **kwargs))
     return res['search.content.users'], {
         "hasMore": res['search.hasMore'],
         "itemsPerPage": res['search.itemsPerPage'],
         "nextOrdinal": res['search.nextOrdinal'],
         "nextPage": res["search.nextPage"]
     }
コード例 #9
0
    def test_get_order_price(client, _items_data, all_headers):
        cart_items_url = url_for("api.cart_items")
        resp = client.post(cart_items_url,
                           json={"items": _items_data},
                           headers=all_headers["customer_headers"])

        resp_data = Cut(resp.json)
        cart_id = resp_data["data.cart.id"]

        order_price_url = url_for("api.order_price", cart_id=-10)
        resp = client.get(order_price_url,
                          headers=all_headers["customer_headers"])
        assert resp.status_code == 404

        order_price_url = url_for("api.order_price", cart_id=cart_id)
        resp = client.get(order_price_url,
                          headers=all_headers["no_auth_headers"])
        assert resp.status_code == 401

        resp = client.get(order_price_url,
                          headers=all_headers["customer_headers"])
        assert resp.status_code == 200

        fiction_cost = TWO_DAYS * MIN_FICTION_COST
        novel_cost = FIVE_DAYS * 1.5
        regular_cost = FOUR_DAYS * 1.5
        sum_book_cost = fiction_cost + novel_cost + regular_cost
        assert resp.json["cost_usd"] == sum_book_cost
コード例 #10
0
ファイル: utils.py プロジェクト: open-contracting/spoonbill
 def _get_header(self, id, paths):
     final_title = []
     for path in paths:
         _object = Cut(self.schema)["properties." + ".".join(path[:-1])]
         if hasattr(_object,
                    "__reference__") and "title" in _object.__reference__:
             title = _object.__reference__["title"]
         else:
             title = Cut(self.schema)["properties." + ".".join(path)]
         if isinstance(title, dict):
             continue
         final_title.append(title)
     if id.startswith("/documents"):
         final_title = final_title[3:]
     if "Organization reference" in final_title:
         final_title.remove("Organization reference")
     return ": ".join(final_title)
コード例 #11
0
def test_schema_header_paths(schema):
    paths = generate_paths(schema["properties"])
    schema = add_paths_to_schema(schema)
    proxy = Cut(schema["properties"])
    for path in paths:
        if path[-1] == "title":
            _path = ".".join(path[:-1])
            assert "$title" in proxy[_path]
            assert proxy[_path]["$title"][-1] == path
コード例 #12
0
def get_status_unshorturls(status_id: int):
    status_data = _get_status_ES(status_id)
    if status_data is not None:
        proxy = Cut(status_data)
        unshorturls(proxy)
        _update_status_ES(status_id, proxy.data)
        return True
    else:
        return None
コード例 #13
0
def config_server_args(config_path):
    loaded_config = load_config(config_files=config_path, ignore_local=True)
    return {
        'config_file': config_path,
        'auto_save': False,
        'ignore_local': True,
        'POCS': loaded_config,
        'POCS_cut': Cut(loaded_config)
    }
コード例 #14
0
ファイル: core.py プロジェクト: jdayllon/leiserbik
    def _get_base_status(id: int):
        status = Cut()
        status['@data_source'] = 'https://mobile.twitter.com'
        status['id'] = id
        status['id_str'] = str(id)
        status['@updated_at'] = arrow.utcnow().format(
            LONG_DATETIME_PATTERN) + "Z"

        return status.data
コード例 #15
0
def _update_status_key_ES(status_id: int, key:str , data: dict):
    status_data = _get_status_ES(status_id)
    if status_data is not None:
        proxy = Cut(status_data)
        proxy[key] = data
        _update_status_ES(status_id, proxy.data)
        return True
    else:
        return None
コード例 #16
0
def get_status_sheldon_score(status_id: int):
    status_data = _get_status_ES(status_id)
    if status_data is not None:
        proxy = Cut(status_data)
        sheldon_score = get_sheldon_score(proxy['full_text'], proxy['lang'])
        proxy['sheldon_score'] = sheldon_score
        _update_status_ES(status_id, proxy.data)
        return True
    else:
        return None
コード例 #17
0
    async def get_user_projects(self, username, offset=12, full_res=False):
        res = await self.get(f"/{username}/projects",
                             params={"offset": offset})

        if full_res:
            return await res.json()

        proxy = Cut(await res.json())
        return (proxy['profile.activeSection.work.projects'],
                proxy['profile.activeSection.work.hasMore'])
コード例 #18
0
def config_server(host='localhost',
                  port=6563,
                  config_file=None,
                  ignore_local=False,
                  auto_save=False,
                  auto_start=True,
                  debug=False):
    """Start the config server in a separate process.

    A convenience function to start the config server.

    Args:
        host (str, optional): Name of host, default 'localhost'.
        port (int, optional): Port for server, default 6563.
        config_file (str|None, optional): The config file to load, defaults to
            `$PANDIR/conf_files/pocs.yaml`.
        ignore_local (bool, optional): If local config files should be ignored,
            default False.
        auto_save (bool, optional): If setting new values should auto-save to
            local file, default False.
        auto_start (bool, optional): If server process should be started
            automatically, default True.
        debug (bool, optional): Flask server debug mode, default False.

    Returns:
        `multiprocessing.Process`: The process running the config server.
    """
    app.config['auto_save'] = auto_save
    app.config['config_file'] = config_file
    app.config['ignore_local'] = ignore_local
    app.config['POCS'] = load_config(config_files=config_file,
                                     ignore_local=ignore_local)
    app.config['POCS_cut'] = Cut(app.config['POCS'])

    def start_server(**kwargs):
        try:
            app.run(**kwargs)
        except OSError:
            warn(
                f'Problem starting config server, is another config server already running?'
            )
            return None

    server_process = Process(target=start_server,
                             kwargs=dict(host=host, port=port, debug=debug),
                             name='panoptes-config-server')

    if server_process is not None and auto_start:
        try:
            server_process.start()
        except KeyboardInterrupt:
            server_process.terminate()

    return server_process
コード例 #19
0
def _load_job_es(job):

    job_file = job[0]
    job_es = job[1]
    job_idx = job[2]

    with open(job_file) as open_file:
        cur_json = Cut(json.load(open_file))
        cur_json['updated_at'] = str(arrow.utcnow()) #arrow.get().format("YYYY-MM-DDTHH:MM:SS")+"Z"
        cur_json_id = cur_json['user.screen_name']

        job_es.index(index=job_idx,
                doc_type='res',
                id = cur_json_id,
                body=json.dumps(cur_json.data))
コード例 #20
0
ファイル: import_manager.py プロジェクト: nor3th/connectors
    def __init__(
        self,
        helper: OpenCTIConnectorHelper,
        elasticsearch_client: Elasticsearch,
        config: dict[str, str],
        datadir: str,
    ):
        self.helper: OpenCTIConnectorHelper = helper
        self.es_client: Elasticsearch = elasticsearch_client
        self.config: Cut = Cut(config)
        self.datadir: str = datadir
        self.idx: str = self.config.get("output.elasticsearch.index")
        self.idx_pattern: str = self.config.get("setup.template.pattern")
        self.pattern: re.Pattern = re.compile(RE_DATEMATH)

        self._setup_elasticsearch_index()
コード例 #21
0
def reset_config():
    """Reset the configuration.

    An endpoint that accepts a POST method. The json request object
    must contain the key ``reset`` (with any value).

    The method will reset the configuration to the original configuration files that were
    used, skipping the local (and saved file).

    .. note::

        If the server was originally started with a local version of the file, those will
        be skipped upon reload. This is not ideal but hopefully this method is not used too
        much.

    Returns:
        str: A json string object containing the keys ``success`` and ``msg`` that indicate
        success or failure.
    """
    params = dict()
    if request.method == 'GET':
        params = request.args
    elif request.method == 'POST':
        params = request.get_json()

    logger.warning(f'Resetting config server')

    if params['reset']:
        # Reload the config
        config = load_config(config_files=app.config['config_file'],
                             load_local=app.config['load_local'])
        # Add an entry to control running of the server.
        config['config_server'] = dict(running=True)
        app.config['POCS'] = config
        app.config['POCS_cut'] = Cut(config)
    else:
        return jsonify({
            'success': False,
            'msg': "Invalid. Need json request: {'reset': True}"
        })

    return jsonify({
        'success': True,
        'msg': f'Configuration reset'
    })
コード例 #22
0
def test_create_user(client, db):
    # test bad data
    register_user_url = url_for('auth.register_user')
    data = {"username": "******"}
    rep = client.post(register_user_url, json=data)
    assert rep.status_code == 400

    data["password"] = "******"
    data["email"] = "*****@*****.**"

    resp = client.post(register_user_url, json=data)
    assert resp.status_code == 201

    data = Cut(resp.get_json())
    id_ = data["user.id"]
    user = db.session.query(User).filter_by(id=id_).first()
    assert user.username == "created"
    assert user.email == "*****@*****.**"
コード例 #23
0
def reset_config():
    if request.is_json:
        get_root_logger().warning(f'Resetting config server')
        req_data = request.get_json()

        if req_data['reset']:
            # Reload the config
            app.config['POCS'] = load_config(
                config_files=app.config['config_file'],
                ignore_local=app.config['ignore_local'])
            app.config['POCS_cut'] = Cut(app.config['POCS'])

        return jsonify(req_data)

    return jsonify({
        'success': False,
        'msg': "Invalid. Need json request: {'reset': True}"
    })
コード例 #24
0
    def __init__(
        self,
        config: dict,
        shutdown_event: Event,
        opencti_client: OpenCTIConnectorHelper,
        elasticsearch_client: Elasticsearch,
    ) -> None:
        super(SignalsManager, self).__init__()
        self.config: Cut = Cut(config)
        self.shutdown_event: Event = shutdown_event
        self.es_client: Elasticsearch = elasticsearch_client

        self.helper: OpenCTIConnectorHelper = opencti_client
        self.author_id = None

        # Default to 5 minutes
        self.interval = 300
        _interval: str = self.config.get("elastic.signals.query_interval",
                                         "5m")
        _dur: timedelta = parse_duration(_interval)
        if _dur is not None:
            self.interval = _dur.total_seconds()

        self.search_idx = self.config.get("elastic.signals.signal_index",
                                          ".siem-signals-*")
        _query: dict = json.loads(
            self.config.get("elastic.signals.query", DEFAULT_QUERY))
        _lookback: str = self.config.get("elastic.signals.lookback_interval",
                                         DEFAULT_LOOKBACK)

        assert self.es_client.ping()
        self.signals_search: dict = (Search(
            using=self.es_client,
            index=self.search_idx).from_dict(_query).filter(
                "range", **{
                    "@timestamp": {
                        "gte": f"now-{_lookback}/m",
                        "lt": "now/m"
                    }
                }).to_dict())

        logger.info("Signals manager thread initialized")
コード例 #25
0
def translate(value,lang_pair='spa|eng'):
    """ Funcion que llama a Apertium APY para obtener una traduccion
        @value: cadena de entrada
        @lang_pair: identificador del patron de traduccion 
    """

    data = [
        ('langpair', lang_pair),
        ('q', value),
        ('markUnknown', 'no'),
    ]
    #!TODO Get YML Config Parameter with apertium url            
    res = requests.post('http://localhost:2737/translate', data=data)

    if res.status_code == 200:
        json_res = json.loads(res.content.decode('utf-8'))
        spa_chunk_str = Cut(json_res)['responseData.translatedText']
        
        return spa_chunk_str
    else:
        return None
コード例 #26
0
def _get_botometer_api_response(screen_name: str, status_id: int):

    # I can be that another task hast loader botometer fresh data (duplicated task)
    res = __check_botometer(screen_name)
    if res == False:
        settings = Settings()
        es = Elasticsearch(settings.ELASTICSEARCH_URL)

        twitter_app_auth = {
            'consumer_key': settings.CONSUMER_KEY,
            'consumer_secret': settings.CONSUMER_SECRET,
            'access_token': settings.ACCESS_TOKEN_KEY,
            'access_token_secret': settings.ACCESS_TOKEN_SECRET,
        }
        #! TODO look for a better place or documentate it
        logger.debug("Init Botometer API 🤖")
        botometer_api_url = 'https://botometer-pro.p.mashape.com'
        bom = botometer.Botometer(botometer_api_url=botometer_api_url,
                                wait_on_ratelimit=True,
                                mashape_key=settings.BOTOMETER_KEY,
                                **twitter_app_auth)
        logger.debug("Calling Botometer API 🤖: %s" % screen_name)
        res = Cut(bom.check_account(screen_name))
        res['updated_at'] = str(arrow.utcnow())
        cur_json = json.dumps(res.data,indent=4)

        save_json(cur_json, settings.BOTOMETER_JSON_BACKUP + "%s.json" % res['user.id_str'])

        logger.info("Indexing Botometer info for screen name: %s" % screen_name)
        es.index(index=settings.ELASTICSEARCH_BOT_INDEX,
                doc_type='res',
                id = screen_name,
                body=cur_json)

        logger.info("Indexing Status updated info for screen name: %s" % screen_name)
        _update_status_key_ES(status_id, 'user.botometer', res.data)
    elif type(res) == dict:
        _update_status_key_ES(status_id, 'user.botometer', res)
コード例 #27
0
ファイル: import_manager.py プロジェクト: nor3th/connectors
    def __init__(
        self,
        helper: OpenCTIConnectorHelper,
        elasticsearch_client: Elasticsearch,
        config: dict[str, str],
        datadir: str,
    ):
        self.helper: OpenCTIConnectorHelper = helper
        self.es_client: Elasticsearch = elasticsearch_client
        self.config: Cut = Cut(config)
        self.datadir: str = datadir

        self.idx: str = self.config.get("output.elasticsearch.index")
        self.idx_pattern: str = self.config.get("setup.template.pattern")
        self.write_idx: str = self.config.get("output.elasticsearch.index")

        if self.config.get("setup.ilm.enabled", False) is True:
            self.write_idx = self.config.get("setup.ilm.rollover_alias",
                                             "opencti")

        self.pattern = re.compile(RE_DATEMATH)

        self._setup_elasticsearch_index()
コード例 #28
0
class ElasticConnector:
    def __init__(self, config: dict = {}, datadir: str = None):
        self.shutdown_event: threading.Event = threading.Event()

        self.helper = OpenCTIConnectorHelper(config)
        logger.info("Connected to OpenCTI")

        if (self.helper.connect_live_stream_id is None
                or self.helper.connect_live_stream_id == "ChangeMe"):
            raise ValueError("Missing Live Stream ID")

        self.config = Cut(config)

        # Start streaming from 1 second ago
        self.helper.set_state({
            "connectorLastEventId":
            str(int(round(time.time() * 1000)) - 1000)
        })

        # Get the external URL as configured in OpenCTI Settings page
        query = """
        query SettingsQuery {
            settings {
                id
                platform_url
            }
        }
        """
        _settings = self.helper.api.query(query)["data"]["settings"]
        self.config["opencti.platform_url"] = _settings.get(
            "platform_url", None)

        self._connect_elasticsearch()

        if self.config["connector.mode"] == "ecs":
            self.import_manager = IntelManager(self.helper, self.elasticsearch,
                                               self.config, datadir)

            self.sightings_manager = SignalsManager(
                config=self.config,
                shutdown_event=self.shutdown_event,
                opencti_client=self.helper,
                elasticsearch_client=self.elasticsearch,
            )
        elif self.config["connector.mode"] == "stix":
            self.import_manager = StixManager(self.helper, self.elasticsearch,
                                              self.config, datadir)

            self.sightings_manager = None
        else:
            logger.error(
                f"connector.mode: {self.config['connector.mode']} is unsupported. Should be 'ecs' or 'stix'"
            )

    def _connect_elasticsearch(self) -> None:
        _apikey: tuple(str) = None
        _httpauth: tuple(str) = None

        if self.config.get("cloud.auth", None):
            _httpauth = tuple(self.config.get("cloud.auth").split(":"))
        elif self.config.get("output.elasticsearch.username",
                             None) and self.config.get(
                                 "output.elasticsearch.password", None):
            _httpauth = (
                self.config.get("output.elasticsearch.username"),
                self.config.get("output.elasticsearch.password"),
            )

        if self.config.get("output.elasticsearch.api_key", None):
            _apikey = tuple(
                self.config.get("output.elasticsearch.api_key").split(":"))

        if _httpauth is not None and _apikey is not None:
            logger.critical(
                "Either username/password auth or api_key auth should be used for Elasticsearch, not both."
            )
            sys.exit(1)

        if self.config.get("cloud.id", None):
            logger.debug(
                f"Connecting to Elasticsearch using cloud.id {self.config.get('cloud.id')}"
            )
            self.elasticsearch = Elasticsearch(
                cloud_id=self.config.get("cloud.id"),
                verify_certs=self.config.get("output.elasticsearch.ssl_verify",
                                             True),
                http_auth=_httpauth,
                api_key=_apikey,
            )
        else:
            logger.debug(
                f"Connecting to Elasticsearch using hosts: {self.config.get('output.elasticsearch.hosts', ['localhost:9200'])}"
            )
            self.elasticsearch = Elasticsearch(
                hosts=self.config.get("output.elasticsearch.hosts",
                                      ["localhost:9200"]),
                verify_certs=self.config.get("output.elasticsearch.ssl_verify",
                                             True),
                http_auth=_httpauth,
                api_key=_apikey,
            )

        logger.info("Connected to Elasticsearch")

        return

    def handle_create(self, timestamp: datetime, data: dict) -> None:
        logger.debug("[CREATE] Processing indicator {" + data["id"] + "}")

        self.import_manager.import_cti_event(timestamp, data)
        return

    def handle_update(self, timestamp, data):
        logger.debug("[UPDATE] Processing indicator {" + data["id"] + "}")

        self.import_manager.import_cti_event(timestamp, data, is_update=True)
        return

    def handle_delete(self, timestamp, data):
        logger.debug("[DELETE] Processing indicator {" + data["id"] + "}")

        self.import_manager.delete_cti_event(data)
        return

    def _process_message(self, msg) -> None:
        logger.debug("_process_message")

        try:
            event_id = msg.id
            timestamp = datetime.fromtimestamp(round(
                int(event_id.split("-")[0]) / 1000),
                                               tz=timezone.utc)
            data = json.loads(msg.data)["data"]
        except ValueError:
            logger.error(f"Unable to process the message: {msg}")
            raise ValueError("Cannot process the message: " + msg)

        logger.debug(f"[PROCESS] Message (id: {event_id}, date: {timestamp})")

        if msg.event == "create":
            return self.handle_create(timestamp, data)

        if msg.event == "update":
            return self.handle_update(timestamp, data)

        if msg.event == "delete":
            return self.handle_delete(timestamp, data)

    def start(self) -> None:
        self.shutdown_event.clear()

        if self.config["connector.mode"] == "ecs":
            self.sightings_manager.start()

        # Look out, this doesn't block
        self.helper.listen_stream(self._process_message)

        try:
            # Just wait here until someone presses ctrl+c
            self.shutdown_event.wait()
        except KeyboardInterrupt:
            self.shutdown_event.set()

        logger.info("Shutting down")

        if self.config["connector.mode"] == "ecs":
            self.sightings_manager.join(timeout=3)
            if self.sightings_manager.is_alive():
                logger.warn("Sightings manager didn't shutdown by request")

        self.elasticsearch.close()
        logger.info(
            "Main thread complete. Waiting on background threads to complete. Press CTRL+C to quit."
        )
コード例 #29
0
def _prepare_json_status(status, unshort_urls=True, sheldon=True):
    """ Improves status info from Twitter and fixes for better indexing
    
    Arguments:
        status {dict} -- Dict with Twitter Status
    
    Keyword Arguments:
        unshort_urls {bool} -- Enables/Disables URLs, if is disables load its much faster (no url resolution) (default: {True})
    
    Returns:
        [str] -- Json output of Status postprocessed
    """

    settings = Settings()

    if type(status) is dict:
        proxy = Cut(status)
    else:
        proxy = Cut(status.AsDict())
    id_str = proxy['id_str']

    # Fix twitter dates to more 'standart' date format
    list_all_keys_w_dots = dotter(proxy.data,'',[])
    try:
        for created_at_keys in list_all_keys_w_dots:
            if 'created_at' in created_at_keys:
                # If matches this means that is fixed in earlier process
                if FIXED_TWITTER_DATE_TIME.match(proxy[created_at_keys]) is None:
                    cur_dt = arrow.get(proxy[created_at_keys], TWITTER_DATETIME_PATTERN)
                    proxy[created_at_keys] = cur_dt.format("YYYY-MM-DDTHH:MM:SS")+"Z"
    except:
        import ipdb; ipdb.set_trace()
        logger.warning("Error parsing dates on %s" % id_str)

    # Fixed source
    try:
        proxy["source_href"] = _get_source_href(proxy["source"])
        proxy["source_desc"] = _get_source_href_desc(proxy["source"])
    except:
        logger.warning("Error fixing source getting href")

    # Fixed geolocations
    # If len(proxy['xxxx.coordinates']) == 1 means that don't have lat , lot, probably a geohash

    # Twitter api says:
    # The longitude and latitude of the Tweet’s location, as a collection in the form [longitude, latitude]. Example: "coordinates":[-97.51087576,35.46500176]
    # Geohash lib input function : def encode(latitude, longitude, precision=12):

    if 'coordinates.coordinates' in proxy and len(proxy['coordinates.coordinates']) > 1:
        proxy['coordinates.coordinates'] = geohash2.encode(proxy['coordinates.coordinates'][1], proxy['coordinates.coordinates'][0])
    if 'geo.coordinates' in proxy and len(proxy['geo.coordinates']) > 1:
        proxy['geo.coordinates'] = geohash2.encode(proxy['geo.coordinates'][0], proxy['geo.coordinates'][1])

    KEY_PLACE_BB = 'place.bounding_box.coordinates'

    if KEY_PLACE_BB in proxy:

        centroid_bb_data = []

        for i in range(0,len(proxy[KEY_PLACE_BB])):
            for j in range(0,len(proxy[KEY_PLACE_BB + "[%d]" % i])):
                cur_key_bb_ij = KEY_PLACE_BB + "[%d][%d]" % (i,j)
                c_lat = proxy[cur_key_bb_ij][1]
                c_lon = proxy[cur_key_bb_ij][0]
                centroid_bb_data += [(float(c_lat),float(c_lon))]
                proxy[cur_key_bb_ij] =  geohash2.encode(c_lat, c_lon)
        
        # Create a new point with de centroid
        if len(centroid_bb_data) > 0:
            centroid_bb_arr = np.array(centroid_bb_data)
            centroid_bb = centeroidnp(centroid_bb_arr)
            proxy['place.bounding_box_centroid'] = geohash2.encode(centroid_bb[0], centroid_bb[1])

    # Check and fix shortened urls in expanded field:
    if unshort_urls:
        get_status_unshorturls(proxy['id'])
    
    # Get sheldon score
    if sheldon:
        #! TODO Transform ISO
        #proxy['sheldon_score'] = get_sheldon_score(proxy['full_text'], proxy['lang'], proxy['id'])
        get_status_sheldon_score(proxy['id'])

    # Get Botometer
    #! TODO Add ES search for botometer info
    if settings.BOTOMETER_KEY is not None:
        proxy['user.botometer'] = get_botometer(proxy['user.screen_name'], proxy['id'] )

    return json.dumps(proxy.data, indent=4)    
コード例 #30
0
ファイル: server.py プロジェクト: panoptes/panoptes-utils
def config_server(
    config_file,
    host=None,
    port=None,
    load_local=True,
    save_local=False,
    auto_start=True,
    access_logs=None,
    error_logs='logger',
):
    """Start the config server in a separate process.

    A convenience function to start the config server.

    Args:
        config_file (str or None): The absolute path to the config file to load. Checks for
            PANOPTES_CONFIG_FILE env var and fails if not provided.
        host (str, optional): The config server host. First checks for PANOPTES_CONFIG_HOST
            env var, defaults to 'localhost'.
        port (str or int, optional): The config server port. First checks for PANOPTES_CONFIG_HOST
            env var, defaults to 6563.
        load_local (bool, optional): If local config files should be used when loading, default True.
        save_local (bool, optional): If setting new values should auto-save to local file, default False.
        auto_start (bool, optional): If server process should be started automatically, default True.
        access_logs ('default' or `logger` or `File`-like or None, optional): Controls access logs for
            the gevent WSGIServer. The `default` string will cause access logs to go to stderr. The
            string `logger` will use the panoptes logger. A File-like will write to file. The default
            `None` will turn off all access logs.
        error_logs ('default' or 'logger' or `File`-like or None, optional): Same as `access_logs` except we use
            our `logger` as the default.

    Returns:
        multiprocessing.Process: The process running the config server.
    """
    config_file = config_file or os.environ['PANOPTES_CONFIG_FILE']
    logger.info(
        f'Starting panoptes-config-server with  config_file={config_file!r}')
    config = load_config(config_files=config_file, load_local=load_local)
    logger.success(f'Config server Loaded {len(config)} top-level items')

    # Add an entry to control running of the server.
    config['config_server'] = dict(running=True)

    logger.success(f'{config!r}')
    cut_config = Cut(config)

    app.config['config_file'] = config_file
    app.config['save_local'] = save_local
    app.config['load_local'] = load_local
    app.config['POCS'] = config
    app.config['POCS_cut'] = cut_config
    logger.info(f'Config items saved to flask config-server')

    # Set up access and error logs for server.
    access_logs = logger if access_logs == 'logger' else access_logs
    error_logs = logger if error_logs == 'logger' else error_logs

    def start_server(host='localhost', port=6563):
        try:
            logger.info(f'Starting panoptes config server with {host}:{port}')
            http_server = WSGIServer((host, int(port)),
                                     app,
                                     log=access_logs,
                                     error_log=error_logs)
            http_server.serve_forever()
        except OSError:
            logger.warning(
                f'Problem starting config server, is another config server already running?'
            )
            return None
        except Exception as e:
            logger.warning(f'Problem starting config server: {e!r}')
            return None

    host = host or os.getenv('PANOPTES_CONFIG_HOST', 'localhost')
    port = port or os.getenv('PANOPTES_CONFIG_PORT', 6563)
    cmd_kwargs = dict(host=host, port=port)
    logger.debug(
        f'Setting up config server process with  cmd_kwargs={cmd_kwargs!r}')
    server_process = Process(target=start_server,
                             daemon=True,
                             kwargs=cmd_kwargs)

    if auto_start:
        server_process.start()

    return server_process