def is_alive(self): """Check for the connection.""" try: return database_exists(self.uri) except Exception as exc: logger.error(exc, exc_info=True) return False
def _fetch_obj_data(msg): if not msg.get('wsid') or not msg.get('objid'): raise RuntimeError(f'Cannot get object ref from msg: {msg}') obj_ref = f"{msg['wsid']}/{msg['objid']}" if msg.get('ver'): obj_ref += f"/{msg['ver']}" try: obj_data = config()['ws_client'].admin_req( 'getObjects', {'objects': [{ 'ref': obj_ref }]}) except WorkspaceResponseError as err: log_error(err) # Workspace is deleted; ignore the error if (err.resp_data and isinstance(err.resp_data, dict) and err.resp_data['error'] and isinstance(err.resp_data['error'], dict) and err.resp_data['error'].get('code') == -32500): return else: raise err result = obj_data['data'][0] if not obj_data or not obj_data['data'] or not obj_data['data'][0]: logger.error(obj_data) raise RuntimeError("Invalid object result from the workspace") return result
def all(self, session): result = session.query(Series).all() obj, err = self.dump(result, many=True) if not err: return obj logger.error("ERROR: {}".format(err)) return []
def run(self): try: logger.info("start wuhan vol crawler.") self.parse() logger.info("end wuhan vol crawler.") except Exception, e: logger.error("error", e)
def produce(data: Any, topic: str = config()['topics']['admin_events'], callback: Callable = None) -> None: """ Produce a new event message on a Kafka topic and block for it to get published. If the produce fails, it will be retried at most _KAFKA_PRODUCE_RETRIES tries (defaults to 5). Args: data: the data to send to Kafka. Must be JSONable. topic: the topic where the data will be sent. callback: a callable provided to the confluent Kafka Producer class. """ producer = Producer({'bootstrap.servers': config()['kafka_server']}) tries = 0 while True: try: producer.produce(topic, json.dumps(data), callback=callback) producer.flush() break except BufferError: if tries == _KAFKA_PRODUCE_RETRIES: raise RuntimeError( "Unable to produce a Kafka message due to BufferError") logger.error( "Received a BufferError trying to produce a message on Kafka. Retrying.." ) tries += 1
def run(self): try: logger.info("start bj gov crawler.") self.get_wangqian() self.get_history() logger.info("end bj gov crawler.") except Exception, e: logger.error("error", e)
def run(self): try: logger.info("start lianjia crawler") self.craw_stat() self.craw_open() self.crawPriceTrends() logger.info("end lianjia crawler") except Exception, e: logger.error("error", e)
def run(self): try: logger.info("start hangzhou vol crawler.") prev_url = self.get_month_vol(datetime.date.today() - datetime.timedelta(days=1), None) while prev_url: prev_url = self.get_month_vol(None, prev_url) time.sleep(6) logger.info("end hangzhou vol crawler.") except: logger.error("error")
def _fetch_ws_info(msg): if not msg.get('wsid'): raise RuntimeError(f'Cannot get workspace info from msg: {msg}') try: ws_info = config()['ws_client'].admin_req('getWorkspaceInfo', {'id': msg['wsid']}) except WorkspaceResponseError as err: logger.error(f'Workspace response error: {err.resp_data}') raise err return ws_info
def run(self): try: logger.info("start draw img.") self.draw_vol_1days() self.draw_vol_7days() self.draw_vol_monthly() self.draw_price_trends() logger.info("end draw img.") except: logger.error("error.")
def all(self, session): result = session.query(Tournaments, Series).join(Series).all() series_schema = SeriesSchema() response = [] for t_row, s_row in result: t_obj, t_err = self.dump(t_row) s_obj, s_err = series_schema.dump(s_row) if not t_err and not s_err: t_obj['series'] = s_obj response.append(t_obj) else: logger.error("Tournament Error: {}, Series Error: {}".format( t_err, s_err)) return response
def _handle_es_err(resp): """Handle a non-2xx response from Elasticsearch.""" logger.error(f"Elasticsearch response error:\n{resp.text}") try: resp_json = resp.json() except Exception: raise ElasticsearchError(resp.text) err_type = get_path(resp_json, ['error', 'root_cause', 0, 'type']) err_reason = get_path(resp_json, ['error', 'reason']) if err_type is None: raise ElasticsearchError(resp.text) if err_type == 'index_not_found_exception': raise UnknownIndex(err_reason) raise ElasticsearchError(err_reason)
def login(): if request.method == "POST": try: form_data = request.form data_validation(form_data, ['username', 'password']) is_valid_user = authenticate_user(form_data.get('username'), form_data.get('password')) if not is_valid_user: raise HTTPError(401) api_session['token'] = is_valid_user.get('user_secret') return redirect(url_for('index')) except Exception as exc: logger.error(str(exc)) return render_template('login.html') else: return render_template('login.html')
def wait_for_service(url, name, timeout=DEFAULT_TIMEOUT): start = time.time() while True: logger.info(f'Attempting to connect to {name} at {url}') try: requests.get(url, timeout=timeout).raise_for_status() logger.info(f'{name} is online!') break except Exception: logger.info(f'Waiting for {name} at {url}') total_elapsed = time.time() - start if total_elapsed > timeout: logger.error( f'Unable to connect to {name} at {url} after {total_elapsed} seconds' ) exit(1) time.sleep(WAIT_POLL_INTERVAL)
def filter_by_date_range(self, session, date_start, date_end): result = session.query(Tournaments, Series).join(Series)\ .filter(Tournaments.date_start >= date_start)\ .filter(Tournaments.date_end <= date_end)\ .all() series_schema = SeriesSchema() response = [] for t_row, s_row in result: t_obj, t_err = self.dump(t_row) s_obj, s_err = series_schema.dump(s_row) if not t_err and not s_err: t_obj['series'] = s_obj response.append(t_obj) else: logger.error("Tournament Error: {}, Series Error: {}".format( t_err, s_err)) return response
def run(self): try: logger.info("start shanghai vol crawler.") html = download(self.url) soup = BeautifulSoup(html, "html.parser") soup.find("div", {}) match = re.findall("出售各类商品房<b>(\\d+)</b>套", html) if match: vol = match[0] ds = re.findall("今日楼市((\\d+)-(\\d+)-(\\d+))", html) date = datetime.now().replace(year=int(ds[0][0]), month=int(ds[0][1]), day=int(ds[0][2])) info = {"city": "上海", "district": "sh", "total": vol, "zhuzai": 0, "date": date} has = self.dao.has_item("sh", date) if not has[0]: self.dao.insert_item(info) logger.info("end shanghai vol crawler.") except Exception, e: logger.error(e)
def post(self): try: payload = request.get_json() calc = CalculatorArgs(payload) calc.validate() rbmq.insert(calc) return {'status': 'ok', 'msg': 'Job inserted on queue'}, http.HTTPStatus.OK except (ValidationError, ModelValidationError) as e: logger.error(f'Error in payload validation: {repr(e)}') return {'status': 'error', 'msg': repr(e)}, http.HTTPStatus.BAD_REQUEST except Exception as e: logger.error(f'Unexpected error {repr(e)}') return {'status': 'error', 'msg': repr(e)}, http.HTTPStatus.INTERNAL_SERVER_ERROR
def create_series(): series_schema = SeriesSchema() if request.method == "POST": form_data = request.form data_validation(form_data, ['name', 'date_start', 'date_end']) series_obj, error = series_schema.load( dict(name=form_data.get("name"), date_start=form_data.get("date_start"), date_end=form_data.get("date_end"))) try: session.add(series_obj) session.commit() except Exception as exc: session.rollback() logger.error("Failed to process request {}".format(str(exc))) raise HTTPError(500, {"error": str(exc)}) return redirect(url_for("create_series")) flash("Series successfully created") return render_template('create_series.html')
def get_shared_users(ws_id): """ Get the list of users that have read, write, or author access to a workspace object. Args: ws_id - workspace id of requested workspace object """ try: obj_perm = config()['ws_client'].admin_req( "getPermissionsMass", {'workspaces': [{ 'id': ws_id }]})['perms'][0] except WorkspaceResponseError as err: logger.error("Workspace response error: ", err.resp_data) raise err shared_users = [] for username, user_perms in obj_perm.items(): if user_perms in ['a', 'r', 'w'] and username != '*': shared_users.append(username) return shared_users
def get_wangqian(self): html = download(self.url, charset="utf-8") soup = BeautifulSoup(html, "html.parser") total_div = soup.find("span", {"id": "ess_ctr5112_FDCJY_SignOnlineStatistics_totalCount4"}) zhuzai_div = soup.find("span", {"id": "ess_ctr5112_FDCJY_SignOnlineStatistics_residenceCount4"}) date_div = soup.find("span", {"id": "ess_ctr5115_FDCJY_HouseTransactionStatist_timeMark4"}) if total_div and zhuzai_div: total = total_div.text.encode("utf-8") zhuzai = zhuzai_div.text.encode("utf-8") date = date_div.text.strip().encode("utf-8") tmp = date.split("-") if len(tmp) == 3: date = datetime.today().replace(year=int(tmp[0]), month=int(tmp[1]), day=int(tmp[2])) else: logger.error("beijing gov get wangqian.") row = {"city": "北京", "district": "bj", "zhuzai": zhuzai, "total": total, "date": date} has = self.dao.has_item("bj", date) if not has[0]: logger.debug(row) self.dao.insert_item(row)
def register(): logger.error("{} {}".format("is_valid_user", request.method)) if request.method == "POST": form_data = request.form data_validation(form_data, ["username", "password", "email"]) user_schema = UsersSchema() user_obj, error = user_schema.load({ "username": form_data.get('username'), "password": form_data.get('password'), "email": form_data.get('email') }) if error: logger.error("Failed to process request.", extras={"username": form_data.get('username')}) raise HTTPError(400, error) try: session.add(user_obj) session.commit() return redirect(url_for("login")) except Exception as exc: session.rollback() logger.error("Failed to process request {}".format(str(exc))) raise HTTPError(500, {"error": str(exc)}) else: return render_template("register.html")
def execute_operation(self, function, arguments): logger.info( f'Delivery tag: {self.delivery_tag}, Executing {function} over {arguments} arguments' ) try: if function == 'sum': return sum(arguments), 'Executed with success' elif function == 'subtract': return reduce((lambda x, y: x - y), arguments), 'Executed with success' elif function == 'multiply': return reduce((lambda x, y: x * y), arguments), 'Executed with success' if function == 'divide': return reduce((lambda x, y: x / y), arguments), 'Executed with success' except Exception as e: logger.error( f'Delivery tag: {self.delivery_tag}, Error: {repr(e)}') return None, f'Operation failed, error {repr(e)}'
def add_account(host_address, username, host_alias, identity_file_path): has_config = get_config() if not has_config: logger.error("No Config Found. New config will be created.") config_obj = read_config_file() available_servers = [] is_duplicate = False for i in range(0, len(config_obj)): if 'Host' in config_obj[i] and host_alias == config_obj[i]['Host']: is_duplicate = True if 'HostName' in config_obj[i] and host_address == config_obj[i][ 'HostName']: is_duplicate = True # { # "Host": sample-test-server", # "HostName": "13.100.212.12", # "User": dummy # "IdentityFile": "~/.ssh/id_rsa" # }, if is_duplicate: logger.error("Duplicate Entry. Please try again:") init() else: new_conf = { "Host": host_alias, "User": username, "HostName": host_address } if identity_file_path: new_conf["IdentityFile"] = identity_file_path config_obj.append(new_conf) write_config_to_file(config_obj) logger.info( "Finsihed adding server to config. You can now connect to the recently added server." )
def _callback(self, channel, method_frame, header_frame, data): #TODO: Ensure no side effects for Consumers logger.info(f'Delivery tag: {method_frame.delivery_tag}') try: operation = json.loads(data.decode('utf-8')) calc_op = CalculatorResult(delivery_tag=method_frame.delivery_tag, function=operation['function'], arguments=operation['arguments']) postgsql.insert(calc_op) except Exception as e: # If a unexpected error occurs send the data for a deadqueue for a future analysis logger.error( f'Delivery tag: {method_frame.delivery_tag}, Error: {repr(e)}') rbmq.insert_deadqueue(operation) finally: logger.info( f"Delivery tag: {method_frame.delivery_tag}, Finished ack") self.ch.basic_ack(delivery_tag=method_frame.delivery_tag)
def connect_to_server(): has_config = get_config() if not has_config: logger.error("No Config Found.") else: config_obj = read_config_file() if len(config_obj) == 0: logger.info("No config found.") else: available_servers = [] for conf in config_obj: if 'Host' in conf: available_servers.append(conf['Host']) host_ques = [{ 'type': 'list', 'name': 'ssh-hosts', 'message': 'Select server to connect to:', 'choices': available_servers }] selected_host = prompt(host_ques, style=style) cmd = ['ssh', selected_host['ssh-hosts']] subprocess.call(cmd)
def start_loop(consumer: Consumer, message_handler: Callable[[Message], None], on_success: Callable[[Message], None] = lambda msg: None, on_failure: Callable[[Message, Exception], None] = lambda msg, e: None, on_config_update: Callable[[], None] = lambda: None, return_on_empty: bool = False, timeout: float = 0.5): """ Run the indexer event loop. Args: consumer: A Kafka consumer which will be polled for messages. message_handler: a processor for messages from Kafka. on_success: called after message_handler has returned sucessfully and the message offset has been committed to Kafka. A noop by default. on_failure: called if the message_handler, the Kafka commit, or on_success throws an exception. A noop by default. on_config_update: called when the configuration has been updated. logger: a logger to use for logging events. By default a standard logger for 'IR'. return_on_empty: stop the loop when we receive an empty message. Helps with testing. timeout: how long to wait polling for the next message """ # Used for re-fetching the configuration with a throttle last_updated_minute = int(time.time() / 60) # Failure count for the current offset fail_count = 0 while True: msg = consumer.poll(timeout=timeout) if msg is None: if return_on_empty: return continue curr_min = int(time.time() / 60) if curr_min > last_updated_minute: # Reload the configuration config(force_reload=True) last_updated_minute = curr_min if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: logger.info('End of stream.') else: logger.error(f"Kafka message error: {msg.error()}") continue val = msg.value().decode('utf-8') try: val_json = json.loads(val) except ValueError as err: logger.error(f'JSON parsing error: {err}') logger.error(f'Message content: {val}') consumer.commit(msg) continue logger.info(f'Received event: {val_json}') start = time.time() try: message_handler(val_json) except Exception as err: logger.error( f'Error processing message: {err.__class__.__name__} {err}') logger.error(traceback.format_exc()) # Save this error and message to a topic in Elasticsearch on_failure(val_json, err) fail_count += 1 logger.info(f"We've had {fail_count} failures so far") if fail_count >= config()['max_handler_failures']: logger.info( f"Reached max failure count of {fail_count}. Moving on.") consumer.commit(msg) fail_count = 0 continue # Move the offset for our partition consumer.commit(msg) on_success(val_json) fail_count = 0 logger.info( f"Handled {val_json['evtype']} message in {time.time() - start}s")
def _delivery_report(err, msg): if err is not None: logger.error(f'Message delivery failed:\n{err}') else: logger.info(f'Message delivered to {msg.topic()}')
def _handle_msg(msg): event_type = msg.get('evtype') if not event_type: msg = f"Missing 'evtype' in event: {msg}" logger.error(msg) raise RuntimeError(msg) objtype = get_obj_type(msg) if objtype is not None and isinstance(objtype, str) and len(objtype) > 0: # Check the type against the configured whitelist or blacklist, if present whitelist = config()['allow_types'] blacklist = config()['skip_types'] if whitelist is not None and objtype not in whitelist: logger.warning(f"Type {objtype} is not in ALLOW_TYPES, skipping") return if blacklist is not None and objtype in blacklist: logger.warning(f"Type {objtype} is in SKIP_TYPES, skipping") return if event_type in [ 'REINDEX', 'NEW_VERSION', 'COPY_OBJECT', 'RENAME_OBJECT' ]: # Index a single workspace object obj = _fetch_obj_data(msg) ws_info = _fetch_ws_info(msg) _reindex_narrative(obj, ws_info) if not config()['skip_releng']: releng_importer.run_importer(obj, ws_info, msg) es_indexer.run_indexer(obj, ws_info, msg) elif event_type == 'REINDEX_WS' or event_type == 'CLONE_WORKSPACE': # Reindex all objects in a workspace, overwriting existing data for objinfo in config()['ws_client'].generate_obj_infos(msg['wsid'], admin=True): objid = objinfo[0] kafka.produce( { 'evtype': 'REINDEX', 'wsid': msg['wsid'], 'objid': objid }, callback=_delivery_report) elif event_type == 'INDEX_NONEXISTENT_WS': # Reindex all objects in a workspace without overwriting any existing data for objinfo in config()['ws_client'].generate_obj_infos(msg['wsid'], admin=True): objid = objinfo[0] kafka.produce( { 'evtype': 'INDEX_NONEXISTENT', 'wsid': msg['wsid'], 'objid': objid }, callback=_delivery_report) elif event_type == 'INDEX_NONEXISTENT': # Import to RE if we are not skipping RE and also it does not exist in the db re_required = not config( )['skip_releng'] and not re_client.check_doc_existence( msg['wsid'], msg['objid']) # Index in elasticsearch if it does not exist there by ID es_required = not es_utils.check_doc_existence(msg['wsid'], msg['objid']) if not re_required and not es_required: # Skip any indexing/importing of this object return # We need to either index or import the object obj = _fetch_obj_data(msg) ws_info = _fetch_ws_info(msg) if re_required: releng_importer.run_importer(obj, ws_info, msg) if es_required: es_indexer.run_indexer(obj, ws_info, msg) elif event_type == 'OBJECT_DELETE_STATE_CHANGE': # Delete the object on RE and ES. Synchronous for now. es_indexer.delete_obj(msg) if not config()['skip_releng']: releng_importer.delete_obj(msg) elif event_type == 'WORKSPACE_DELETE_STATE_CHANGE': # Delete everything in RE and ES under this workspace es_indexer.delete_ws(msg) if not config()['skip_releng']: releng_importer.delete_ws(msg) elif event_type == 'SET_GLOBAL_PERMISSION': # Set the `is_public` permissions for a workspace es_indexer.set_perms(msg) if not config()['skip_releng']: releng_importer.set_perms(msg) elif event_type == 'SET_PERMISSION': # Share the narrative with users es_indexer.set_user_perms(msg) elif event_type == 'RELOAD_ELASTIC_ALIASES': # Reload aliases on ES from the global config file es_indexer.reload_aliases() else: logger.warning(f"Unrecognized event {event_type}.")