def sent(jd_id): details = srv.get_jd_details(jd_id) html = details.get('jd_html') if request.method == 'GET': form = TagEntryForm() present_tags = tag_srv.get_tagged_sentences_for_jd(jd_id).get( 'taggings') if not present_tags: present_tags = [{ 'sentence': sent, 'tag': 'Tags' } for sent in details.get('jd_sentences')] for ptag in present_tags: st = SentenceTag() st.sentence = ptag.get('sentence') st.tag = ptag.get('tag') form.tag_list.append_entry(st) return render_template('sent.html', form=form, jd_html=Markup(html)) else: out = Cut({}, sep='-') for key, value in request.form.to_dict().items(): out.setdefault(key, value) out = dict(out) tags = list(out['tag_list'].values()) tag_srv.perform_sentence_tagging(jd_id, tags) return render_template('sent.html', tagged=True, jd_html=Markup(html))
def add_paths_to_schema(schema): """ Extracts full path for each title in schema; creates paths list of full title for each :param unres_schema: Unresolved schema :param schema: Schema object that will be updated :return: Schema with full title paths """ proxy = Cut(copy.deepcopy(schema)) updated_items = {} for table in proxy["properties"].keys(): path_item = Cut({table: copy.deepcopy(proxy["properties"][table])}) # Generating path to each title in schema for path in generate_paths({table: proxy["properties"][table]}): if path[-1] == "title": object_path = ".".join(path[:-1]) path_item[object_path]["$path"] = path updated_items[table] = path_item[table] proxy["properties"] = updated_items # Generating array with title paths for each title in schema title_paths = [path for path in title_path(proxy["properties"])] for path_list in title_paths: location = "properties." + ".".join(path_list[-1][:-1]) proxy[location]["$title"] = path_list return proxy
class Config(object): """Provides configuration for the Slow Start Rewatch.""" def __init__( self, filename: str = DEFAULT_CONFIG_FILENAME, ) -> None: """Initialize Config.""" log.debug("default_config_load", filename=filename) config_list = [os.path.join(ROOT_DIR, filename)] self.config = Cut(anyconfig.load(config_list)) self._substitute_placeholders() self.storage = ConfigStorage(self.config["local_config_file"]) def __getitem__(self, key): """Return the config item.""" return self.config[key] def __setitem__(self, key, item_value) -> None: """Set the config item.""" if key in self.config and self.config[key] == item_value: return log.debug("config_save", key=key, item_value=item_value) self.config[key] = item_value self.storage.save(self.config) def __contains__(self, key): """Return true if an item exists in the config.""" return key in self.config def load(self) -> None: """Load config items from the local storage.""" self.config.update(self.storage.load()) def _substitute_placeholders(self) -> None: """Substitute the placeholders in the config.""" mapping = { "home_dir": HOME_DIR, "ps": os.path.sep, "version": version(), } keys = [ "data_dir", "schedule_file", "local_config_file", "reddit.user_agent", ] log.debug("config_substitute", mapping=mapping, keys=keys) for key in keys: self.config[key] = Template(self.config[key]).safe_substitute( mapping, )
def __init__(self, config: dict = {}, datadir: str = None): self.shutdown_event: threading.Event = threading.Event() self.helper = OpenCTIConnectorHelper(config) logger.info("Connected to OpenCTI") if (self.helper.connect_live_stream_id is None or self.helper.connect_live_stream_id == "ChangeMe"): raise ValueError("Missing Live Stream ID") self.config = Cut(config) # Start streaming from 1 second ago self.helper.set_state({ "connectorLastEventId": str(int(round(time.time() * 1000)) - 1000) }) # Get the external URL as configured in OpenCTI Settings page query = """ query SettingsQuery { settings { id platform_url } } """ _settings = self.helper.api.query(query)["data"]["settings"] self.config["opencti.platform_url"] = _settings.get( "platform_url", None) self._connect_elasticsearch() if self.config["connector.mode"] == "ecs": self.import_manager = IntelManager(self.helper, self.elasticsearch, self.config, datadir) self.sightings_manager = SignalsManager( config=self.config, shutdown_event=self.shutdown_event, opencti_client=self.helper, elasticsearch_client=self.elasticsearch, ) elif self.config["connector.mode"] == "stix": self.import_manager = StixManager(self.helper, self.elasticsearch, self.config, datadir) self.sightings_manager = None else: logger.error( f"connector.mode: {self.config['connector.mode']} is unsupported. Should be 'ecs' or 'stix'" )
def __init__( self, filename: str = DEFAULT_CONFIG_FILENAME, ) -> None: """Initialize Config.""" log.debug("default_config_load", filename=filename) config_list = [os.path.join(ROOT_DIR, filename)] self.config = Cut(anyconfig.load(config_list)) self._substitute_placeholders() self.storage = ConfigStorage(self.config["local_config_file"])
def initialize_sentry() -> None: """Initialize Sentry monitoring. To enable Sentry, please define settings "project.sentry_dsn" or environment SENTRY_ENDPOINT. """ endpoint = settings["project.sentry_dsn"] if not endpoint: logger.warning("Sentry not initialized.") return toml_path = Path().cwd().joinpath("pyproject.toml") with open(str(toml_path), "r") as file: data = Cut(toml.load(file)) current_version = data["tool.poetry.version"] try: sentry_sdk.init( dsn=endpoint, integrations=[AwsLambdaIntegration()], environment=settings.stela_options.current_environment, release=current_version, ) logger.info("Sentry initialized.") except BadDsn as error: logger.error(f"Error when initializing Sentry: {error}")
def test_get_order_price_min_days( client, all_headers, _items_data_min_days): cart_items_url = url_for("api.cart_items") resp = client.post( cart_items_url, json={"items": _items_data_min_days}, headers=all_headers["customer_headers"]) assert resp.status_code == 201 resp_data = Cut(resp.json) cart_id = resp_data["data.cart.id"] order_price_url = url_for("api.order_price", cart_id=cart_id) resp = client.get( order_price_url, headers=all_headers["customer_headers"]) assert resp.status_code == 200 assert resp.json["cost_usd"] == sum([ MIN_FICTION_COST, MIN_NOVEL_COST, MIN_REGULAR_COST ])
async def get_users(self, search="", country="", state="", state_code="", city="", school_id="", tool_id="", sort="recommended", ordinal="", **kwargs): res = Cut(await self.search(content="users", search=search, country=country, state=state, stateCode=state_code, city=city, schools=school_id, tools=tool_id, sort=sort, ordinal=ordinal, **kwargs)) return res['search.content.users'], { "hasMore": res['search.hasMore'], "itemsPerPage": res['search.itemsPerPage'], "nextOrdinal": res['search.nextOrdinal'], "nextPage": res["search.nextPage"] }
def test_get_order_price(client, _items_data, all_headers): cart_items_url = url_for("api.cart_items") resp = client.post(cart_items_url, json={"items": _items_data}, headers=all_headers["customer_headers"]) resp_data = Cut(resp.json) cart_id = resp_data["data.cart.id"] order_price_url = url_for("api.order_price", cart_id=-10) resp = client.get(order_price_url, headers=all_headers["customer_headers"]) assert resp.status_code == 404 order_price_url = url_for("api.order_price", cart_id=cart_id) resp = client.get(order_price_url, headers=all_headers["no_auth_headers"]) assert resp.status_code == 401 resp = client.get(order_price_url, headers=all_headers["customer_headers"]) assert resp.status_code == 200 fiction_cost = TWO_DAYS * MIN_FICTION_COST novel_cost = FIVE_DAYS * 1.5 regular_cost = FOUR_DAYS * 1.5 sum_book_cost = fiction_cost + novel_cost + regular_cost assert resp.json["cost_usd"] == sum_book_cost
def _get_header(self, id, paths): final_title = [] for path in paths: _object = Cut(self.schema)["properties." + ".".join(path[:-1])] if hasattr(_object, "__reference__") and "title" in _object.__reference__: title = _object.__reference__["title"] else: title = Cut(self.schema)["properties." + ".".join(path)] if isinstance(title, dict): continue final_title.append(title) if id.startswith("/documents"): final_title = final_title[3:] if "Organization reference" in final_title: final_title.remove("Organization reference") return ": ".join(final_title)
def test_schema_header_paths(schema): paths = generate_paths(schema["properties"]) schema = add_paths_to_schema(schema) proxy = Cut(schema["properties"]) for path in paths: if path[-1] == "title": _path = ".".join(path[:-1]) assert "$title" in proxy[_path] assert proxy[_path]["$title"][-1] == path
def get_status_unshorturls(status_id: int): status_data = _get_status_ES(status_id) if status_data is not None: proxy = Cut(status_data) unshorturls(proxy) _update_status_ES(status_id, proxy.data) return True else: return None
def config_server_args(config_path): loaded_config = load_config(config_files=config_path, ignore_local=True) return { 'config_file': config_path, 'auto_save': False, 'ignore_local': True, 'POCS': loaded_config, 'POCS_cut': Cut(loaded_config) }
def _get_base_status(id: int): status = Cut() status['@data_source'] = 'https://mobile.twitter.com' status['id'] = id status['id_str'] = str(id) status['@updated_at'] = arrow.utcnow().format( LONG_DATETIME_PATTERN) + "Z" return status.data
def _update_status_key_ES(status_id: int, key:str , data: dict): status_data = _get_status_ES(status_id) if status_data is not None: proxy = Cut(status_data) proxy[key] = data _update_status_ES(status_id, proxy.data) return True else: return None
def get_status_sheldon_score(status_id: int): status_data = _get_status_ES(status_id) if status_data is not None: proxy = Cut(status_data) sheldon_score = get_sheldon_score(proxy['full_text'], proxy['lang']) proxy['sheldon_score'] = sheldon_score _update_status_ES(status_id, proxy.data) return True else: return None
async def get_user_projects(self, username, offset=12, full_res=False): res = await self.get(f"/{username}/projects", params={"offset": offset}) if full_res: return await res.json() proxy = Cut(await res.json()) return (proxy['profile.activeSection.work.projects'], proxy['profile.activeSection.work.hasMore'])
def config_server(host='localhost', port=6563, config_file=None, ignore_local=False, auto_save=False, auto_start=True, debug=False): """Start the config server in a separate process. A convenience function to start the config server. Args: host (str, optional): Name of host, default 'localhost'. port (int, optional): Port for server, default 6563. config_file (str|None, optional): The config file to load, defaults to `$PANDIR/conf_files/pocs.yaml`. ignore_local (bool, optional): If local config files should be ignored, default False. auto_save (bool, optional): If setting new values should auto-save to local file, default False. auto_start (bool, optional): If server process should be started automatically, default True. debug (bool, optional): Flask server debug mode, default False. Returns: `multiprocessing.Process`: The process running the config server. """ app.config['auto_save'] = auto_save app.config['config_file'] = config_file app.config['ignore_local'] = ignore_local app.config['POCS'] = load_config(config_files=config_file, ignore_local=ignore_local) app.config['POCS_cut'] = Cut(app.config['POCS']) def start_server(**kwargs): try: app.run(**kwargs) except OSError: warn( f'Problem starting config server, is another config server already running?' ) return None server_process = Process(target=start_server, kwargs=dict(host=host, port=port, debug=debug), name='panoptes-config-server') if server_process is not None and auto_start: try: server_process.start() except KeyboardInterrupt: server_process.terminate() return server_process
def _load_job_es(job): job_file = job[0] job_es = job[1] job_idx = job[2] with open(job_file) as open_file: cur_json = Cut(json.load(open_file)) cur_json['updated_at'] = str(arrow.utcnow()) #arrow.get().format("YYYY-MM-DDTHH:MM:SS")+"Z" cur_json_id = cur_json['user.screen_name'] job_es.index(index=job_idx, doc_type='res', id = cur_json_id, body=json.dumps(cur_json.data))
def __init__( self, helper: OpenCTIConnectorHelper, elasticsearch_client: Elasticsearch, config: dict[str, str], datadir: str, ): self.helper: OpenCTIConnectorHelper = helper self.es_client: Elasticsearch = elasticsearch_client self.config: Cut = Cut(config) self.datadir: str = datadir self.idx: str = self.config.get("output.elasticsearch.index") self.idx_pattern: str = self.config.get("setup.template.pattern") self.pattern: re.Pattern = re.compile(RE_DATEMATH) self._setup_elasticsearch_index()
def reset_config(): """Reset the configuration. An endpoint that accepts a POST method. The json request object must contain the key ``reset`` (with any value). The method will reset the configuration to the original configuration files that were used, skipping the local (and saved file). .. note:: If the server was originally started with a local version of the file, those will be skipped upon reload. This is not ideal but hopefully this method is not used too much. Returns: str: A json string object containing the keys ``success`` and ``msg`` that indicate success or failure. """ params = dict() if request.method == 'GET': params = request.args elif request.method == 'POST': params = request.get_json() logger.warning(f'Resetting config server') if params['reset']: # Reload the config config = load_config(config_files=app.config['config_file'], load_local=app.config['load_local']) # Add an entry to control running of the server. config['config_server'] = dict(running=True) app.config['POCS'] = config app.config['POCS_cut'] = Cut(config) else: return jsonify({ 'success': False, 'msg': "Invalid. Need json request: {'reset': True}" }) return jsonify({ 'success': True, 'msg': f'Configuration reset' })
def test_create_user(client, db): # test bad data register_user_url = url_for('auth.register_user') data = {"username": "******"} rep = client.post(register_user_url, json=data) assert rep.status_code == 400 data["password"] = "******" data["email"] = "*****@*****.**" resp = client.post(register_user_url, json=data) assert resp.status_code == 201 data = Cut(resp.get_json()) id_ = data["user.id"] user = db.session.query(User).filter_by(id=id_).first() assert user.username == "created" assert user.email == "*****@*****.**"
def reset_config(): if request.is_json: get_root_logger().warning(f'Resetting config server') req_data = request.get_json() if req_data['reset']: # Reload the config app.config['POCS'] = load_config( config_files=app.config['config_file'], ignore_local=app.config['ignore_local']) app.config['POCS_cut'] = Cut(app.config['POCS']) return jsonify(req_data) return jsonify({ 'success': False, 'msg': "Invalid. Need json request: {'reset': True}" })
def __init__( self, config: dict, shutdown_event: Event, opencti_client: OpenCTIConnectorHelper, elasticsearch_client: Elasticsearch, ) -> None: super(SignalsManager, self).__init__() self.config: Cut = Cut(config) self.shutdown_event: Event = shutdown_event self.es_client: Elasticsearch = elasticsearch_client self.helper: OpenCTIConnectorHelper = opencti_client self.author_id = None # Default to 5 minutes self.interval = 300 _interval: str = self.config.get("elastic.signals.query_interval", "5m") _dur: timedelta = parse_duration(_interval) if _dur is not None: self.interval = _dur.total_seconds() self.search_idx = self.config.get("elastic.signals.signal_index", ".siem-signals-*") _query: dict = json.loads( self.config.get("elastic.signals.query", DEFAULT_QUERY)) _lookback: str = self.config.get("elastic.signals.lookback_interval", DEFAULT_LOOKBACK) assert self.es_client.ping() self.signals_search: dict = (Search( using=self.es_client, index=self.search_idx).from_dict(_query).filter( "range", **{ "@timestamp": { "gte": f"now-{_lookback}/m", "lt": "now/m" } }).to_dict()) logger.info("Signals manager thread initialized")
def translate(value,lang_pair='spa|eng'): """ Funcion que llama a Apertium APY para obtener una traduccion @value: cadena de entrada @lang_pair: identificador del patron de traduccion """ data = [ ('langpair', lang_pair), ('q', value), ('markUnknown', 'no'), ] #!TODO Get YML Config Parameter with apertium url res = requests.post('http://localhost:2737/translate', data=data) if res.status_code == 200: json_res = json.loads(res.content.decode('utf-8')) spa_chunk_str = Cut(json_res)['responseData.translatedText'] return spa_chunk_str else: return None
def _get_botometer_api_response(screen_name: str, status_id: int): # I can be that another task hast loader botometer fresh data (duplicated task) res = __check_botometer(screen_name) if res == False: settings = Settings() es = Elasticsearch(settings.ELASTICSEARCH_URL) twitter_app_auth = { 'consumer_key': settings.CONSUMER_KEY, 'consumer_secret': settings.CONSUMER_SECRET, 'access_token': settings.ACCESS_TOKEN_KEY, 'access_token_secret': settings.ACCESS_TOKEN_SECRET, } #! TODO look for a better place or documentate it logger.debug("Init Botometer API 🤖") botometer_api_url = 'https://botometer-pro.p.mashape.com' bom = botometer.Botometer(botometer_api_url=botometer_api_url, wait_on_ratelimit=True, mashape_key=settings.BOTOMETER_KEY, **twitter_app_auth) logger.debug("Calling Botometer API 🤖: %s" % screen_name) res = Cut(bom.check_account(screen_name)) res['updated_at'] = str(arrow.utcnow()) cur_json = json.dumps(res.data,indent=4) save_json(cur_json, settings.BOTOMETER_JSON_BACKUP + "%s.json" % res['user.id_str']) logger.info("Indexing Botometer info for screen name: %s" % screen_name) es.index(index=settings.ELASTICSEARCH_BOT_INDEX, doc_type='res', id = screen_name, body=cur_json) logger.info("Indexing Status updated info for screen name: %s" % screen_name) _update_status_key_ES(status_id, 'user.botometer', res.data) elif type(res) == dict: _update_status_key_ES(status_id, 'user.botometer', res)
def __init__( self, helper: OpenCTIConnectorHelper, elasticsearch_client: Elasticsearch, config: dict[str, str], datadir: str, ): self.helper: OpenCTIConnectorHelper = helper self.es_client: Elasticsearch = elasticsearch_client self.config: Cut = Cut(config) self.datadir: str = datadir self.idx: str = self.config.get("output.elasticsearch.index") self.idx_pattern: str = self.config.get("setup.template.pattern") self.write_idx: str = self.config.get("output.elasticsearch.index") if self.config.get("setup.ilm.enabled", False) is True: self.write_idx = self.config.get("setup.ilm.rollover_alias", "opencti") self.pattern = re.compile(RE_DATEMATH) self._setup_elasticsearch_index()
class ElasticConnector: def __init__(self, config: dict = {}, datadir: str = None): self.shutdown_event: threading.Event = threading.Event() self.helper = OpenCTIConnectorHelper(config) logger.info("Connected to OpenCTI") if (self.helper.connect_live_stream_id is None or self.helper.connect_live_stream_id == "ChangeMe"): raise ValueError("Missing Live Stream ID") self.config = Cut(config) # Start streaming from 1 second ago self.helper.set_state({ "connectorLastEventId": str(int(round(time.time() * 1000)) - 1000) }) # Get the external URL as configured in OpenCTI Settings page query = """ query SettingsQuery { settings { id platform_url } } """ _settings = self.helper.api.query(query)["data"]["settings"] self.config["opencti.platform_url"] = _settings.get( "platform_url", None) self._connect_elasticsearch() if self.config["connector.mode"] == "ecs": self.import_manager = IntelManager(self.helper, self.elasticsearch, self.config, datadir) self.sightings_manager = SignalsManager( config=self.config, shutdown_event=self.shutdown_event, opencti_client=self.helper, elasticsearch_client=self.elasticsearch, ) elif self.config["connector.mode"] == "stix": self.import_manager = StixManager(self.helper, self.elasticsearch, self.config, datadir) self.sightings_manager = None else: logger.error( f"connector.mode: {self.config['connector.mode']} is unsupported. Should be 'ecs' or 'stix'" ) def _connect_elasticsearch(self) -> None: _apikey: tuple(str) = None _httpauth: tuple(str) = None if self.config.get("cloud.auth", None): _httpauth = tuple(self.config.get("cloud.auth").split(":")) elif self.config.get("output.elasticsearch.username", None) and self.config.get( "output.elasticsearch.password", None): _httpauth = ( self.config.get("output.elasticsearch.username"), self.config.get("output.elasticsearch.password"), ) if self.config.get("output.elasticsearch.api_key", None): _apikey = tuple( self.config.get("output.elasticsearch.api_key").split(":")) if _httpauth is not None and _apikey is not None: logger.critical( "Either username/password auth or api_key auth should be used for Elasticsearch, not both." ) sys.exit(1) if self.config.get("cloud.id", None): logger.debug( f"Connecting to Elasticsearch using cloud.id {self.config.get('cloud.id')}" ) self.elasticsearch = Elasticsearch( cloud_id=self.config.get("cloud.id"), verify_certs=self.config.get("output.elasticsearch.ssl_verify", True), http_auth=_httpauth, api_key=_apikey, ) else: logger.debug( f"Connecting to Elasticsearch using hosts: {self.config.get('output.elasticsearch.hosts', ['localhost:9200'])}" ) self.elasticsearch = Elasticsearch( hosts=self.config.get("output.elasticsearch.hosts", ["localhost:9200"]), verify_certs=self.config.get("output.elasticsearch.ssl_verify", True), http_auth=_httpauth, api_key=_apikey, ) logger.info("Connected to Elasticsearch") return def handle_create(self, timestamp: datetime, data: dict) -> None: logger.debug("[CREATE] Processing indicator {" + data["id"] + "}") self.import_manager.import_cti_event(timestamp, data) return def handle_update(self, timestamp, data): logger.debug("[UPDATE] Processing indicator {" + data["id"] + "}") self.import_manager.import_cti_event(timestamp, data, is_update=True) return def handle_delete(self, timestamp, data): logger.debug("[DELETE] Processing indicator {" + data["id"] + "}") self.import_manager.delete_cti_event(data) return def _process_message(self, msg) -> None: logger.debug("_process_message") try: event_id = msg.id timestamp = datetime.fromtimestamp(round( int(event_id.split("-")[0]) / 1000), tz=timezone.utc) data = json.loads(msg.data)["data"] except ValueError: logger.error(f"Unable to process the message: {msg}") raise ValueError("Cannot process the message: " + msg) logger.debug(f"[PROCESS] Message (id: {event_id}, date: {timestamp})") if msg.event == "create": return self.handle_create(timestamp, data) if msg.event == "update": return self.handle_update(timestamp, data) if msg.event == "delete": return self.handle_delete(timestamp, data) def start(self) -> None: self.shutdown_event.clear() if self.config["connector.mode"] == "ecs": self.sightings_manager.start() # Look out, this doesn't block self.helper.listen_stream(self._process_message) try: # Just wait here until someone presses ctrl+c self.shutdown_event.wait() except KeyboardInterrupt: self.shutdown_event.set() logger.info("Shutting down") if self.config["connector.mode"] == "ecs": self.sightings_manager.join(timeout=3) if self.sightings_manager.is_alive(): logger.warn("Sightings manager didn't shutdown by request") self.elasticsearch.close() logger.info( "Main thread complete. Waiting on background threads to complete. Press CTRL+C to quit." )
def _prepare_json_status(status, unshort_urls=True, sheldon=True): """ Improves status info from Twitter and fixes for better indexing Arguments: status {dict} -- Dict with Twitter Status Keyword Arguments: unshort_urls {bool} -- Enables/Disables URLs, if is disables load its much faster (no url resolution) (default: {True}) Returns: [str] -- Json output of Status postprocessed """ settings = Settings() if type(status) is dict: proxy = Cut(status) else: proxy = Cut(status.AsDict()) id_str = proxy['id_str'] # Fix twitter dates to more 'standart' date format list_all_keys_w_dots = dotter(proxy.data,'',[]) try: for created_at_keys in list_all_keys_w_dots: if 'created_at' in created_at_keys: # If matches this means that is fixed in earlier process if FIXED_TWITTER_DATE_TIME.match(proxy[created_at_keys]) is None: cur_dt = arrow.get(proxy[created_at_keys], TWITTER_DATETIME_PATTERN) proxy[created_at_keys] = cur_dt.format("YYYY-MM-DDTHH:MM:SS")+"Z" except: import ipdb; ipdb.set_trace() logger.warning("Error parsing dates on %s" % id_str) # Fixed source try: proxy["source_href"] = _get_source_href(proxy["source"]) proxy["source_desc"] = _get_source_href_desc(proxy["source"]) except: logger.warning("Error fixing source getting href") # Fixed geolocations # If len(proxy['xxxx.coordinates']) == 1 means that don't have lat , lot, probably a geohash # Twitter api says: # The longitude and latitude of the Tweet’s location, as a collection in the form [longitude, latitude]. Example: "coordinates":[-97.51087576,35.46500176] # Geohash lib input function : def encode(latitude, longitude, precision=12): if 'coordinates.coordinates' in proxy and len(proxy['coordinates.coordinates']) > 1: proxy['coordinates.coordinates'] = geohash2.encode(proxy['coordinates.coordinates'][1], proxy['coordinates.coordinates'][0]) if 'geo.coordinates' in proxy and len(proxy['geo.coordinates']) > 1: proxy['geo.coordinates'] = geohash2.encode(proxy['geo.coordinates'][0], proxy['geo.coordinates'][1]) KEY_PLACE_BB = 'place.bounding_box.coordinates' if KEY_PLACE_BB in proxy: centroid_bb_data = [] for i in range(0,len(proxy[KEY_PLACE_BB])): for j in range(0,len(proxy[KEY_PLACE_BB + "[%d]" % i])): cur_key_bb_ij = KEY_PLACE_BB + "[%d][%d]" % (i,j) c_lat = proxy[cur_key_bb_ij][1] c_lon = proxy[cur_key_bb_ij][0] centroid_bb_data += [(float(c_lat),float(c_lon))] proxy[cur_key_bb_ij] = geohash2.encode(c_lat, c_lon) # Create a new point with de centroid if len(centroid_bb_data) > 0: centroid_bb_arr = np.array(centroid_bb_data) centroid_bb = centeroidnp(centroid_bb_arr) proxy['place.bounding_box_centroid'] = geohash2.encode(centroid_bb[0], centroid_bb[1]) # Check and fix shortened urls in expanded field: if unshort_urls: get_status_unshorturls(proxy['id']) # Get sheldon score if sheldon: #! TODO Transform ISO #proxy['sheldon_score'] = get_sheldon_score(proxy['full_text'], proxy['lang'], proxy['id']) get_status_sheldon_score(proxy['id']) # Get Botometer #! TODO Add ES search for botometer info if settings.BOTOMETER_KEY is not None: proxy['user.botometer'] = get_botometer(proxy['user.screen_name'], proxy['id'] ) return json.dumps(proxy.data, indent=4)
def config_server( config_file, host=None, port=None, load_local=True, save_local=False, auto_start=True, access_logs=None, error_logs='logger', ): """Start the config server in a separate process. A convenience function to start the config server. Args: config_file (str or None): The absolute path to the config file to load. Checks for PANOPTES_CONFIG_FILE env var and fails if not provided. host (str, optional): The config server host. First checks for PANOPTES_CONFIG_HOST env var, defaults to 'localhost'. port (str or int, optional): The config server port. First checks for PANOPTES_CONFIG_HOST env var, defaults to 6563. load_local (bool, optional): If local config files should be used when loading, default True. save_local (bool, optional): If setting new values should auto-save to local file, default False. auto_start (bool, optional): If server process should be started automatically, default True. access_logs ('default' or `logger` or `File`-like or None, optional): Controls access logs for the gevent WSGIServer. The `default` string will cause access logs to go to stderr. The string `logger` will use the panoptes logger. A File-like will write to file. The default `None` will turn off all access logs. error_logs ('default' or 'logger' or `File`-like or None, optional): Same as `access_logs` except we use our `logger` as the default. Returns: multiprocessing.Process: The process running the config server. """ config_file = config_file or os.environ['PANOPTES_CONFIG_FILE'] logger.info( f'Starting panoptes-config-server with config_file={config_file!r}') config = load_config(config_files=config_file, load_local=load_local) logger.success(f'Config server Loaded {len(config)} top-level items') # Add an entry to control running of the server. config['config_server'] = dict(running=True) logger.success(f'{config!r}') cut_config = Cut(config) app.config['config_file'] = config_file app.config['save_local'] = save_local app.config['load_local'] = load_local app.config['POCS'] = config app.config['POCS_cut'] = cut_config logger.info(f'Config items saved to flask config-server') # Set up access and error logs for server. access_logs = logger if access_logs == 'logger' else access_logs error_logs = logger if error_logs == 'logger' else error_logs def start_server(host='localhost', port=6563): try: logger.info(f'Starting panoptes config server with {host}:{port}') http_server = WSGIServer((host, int(port)), app, log=access_logs, error_log=error_logs) http_server.serve_forever() except OSError: logger.warning( f'Problem starting config server, is another config server already running?' ) return None except Exception as e: logger.warning(f'Problem starting config server: {e!r}') return None host = host or os.getenv('PANOPTES_CONFIG_HOST', 'localhost') port = port or os.getenv('PANOPTES_CONFIG_PORT', 6563) cmd_kwargs = dict(host=host, port=port) logger.debug( f'Setting up config server process with cmd_kwargs={cmd_kwargs!r}') server_process = Process(target=start_server, daemon=True, kwargs=cmd_kwargs) if auto_start: server_process.start() return server_process