def _init_api_connector(self, start_time: datetime): """Set up uplink and send initial message. Args: start_time: monitoring start timestamp """ try: configuration = load_api_configuration() except RuntimeError as ex: msg = 'Could not load API settings: {0}'.format(ex) logging.error(msg) self._should_close = True self._close() return camera_pk = configuration.camera_pk logging.info('Setting up uplink...') self._api_connector = ApiConnector( camera_pk, configuration, start_time, )
def __init__(self, conf, specs): #TODO: verify logger setup #set up the logger self.logger = logging.getLogger("fdp") self.logger.setLevel(logging.DEBUG) fh = logging.FileHandler('errors.log') fh.setLevel(logging.DEBUG) self.logger.addHandler(fh) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) self.logger.addHandler(ch) # initialize variables and validate the configuration self.config = conf self.specs = specs self.logger.debug("Validating the FDP configuration.") # TODO self.validate_configuration(conf, specs) self.logger.debug("Initializing the input API.") self.radar_api = ApiConnector(host=conf['api_host'], user=conf['api_user'], password=conf['api_password'])
class FairMetadata(object): def __init__(self, conf, specs): #TODO: verify logger setup #set up the logger self.logger = logging.getLogger("fdp") self.logger.setLevel(logging.DEBUG) fh = logging.FileHandler('errors.log') fh.setLevel(logging.DEBUG) self.logger.addHandler(fh) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) self.logger.addHandler(ch) # initialize variables and validate the configuration self.config = conf self.specs = specs self.logger.debug("Validating the FDP configuration.") # TODO self.validate_configuration(conf, specs) self.logger.debug("Initializing the input API.") self.radar_api = ApiConnector(host=conf['api_host'], user=conf['api_user'], password=conf['api_password']) @staticmethod def validate_configuration(conf, specs): """ Validate the values provided in the provided configuration to make sure that they conform to the specifications. Throws errors when specifications are violated. :param conf: The configuration :param specs: The specifications :return: """ # make sure that the required fields are required are in the configuration raise NotImplementedError() def get_typed_variable(self, d): """ Convert the input var_value into the given var_type :param dictionary containing the following: var_type: string with value Literal, URI, API_Literal or API_URI var_value: string or number var_name (optional): in the case that var_type = API_*, this specifies the variable name corresponding to the appropriate variable in the response. :return: A Literal or URIRef of the input var_value """ # TODO: validate types of each variable (Literal, URI, Date, FOAF:Agent) #initialize variables var_type = d['type'] var_name = d['varname'] if 'varname' in d else None var_value = d['value'] if var_type == 'Literal': return Literal(var_value) elif var_type == 'URI': return URIRef(var_value) elif var_type == 'API_Literal' or var_type == 'API_URI': (_, t) = var_type.split('_') reply = self.radar_api.get(url=var_value) # TODO: add support for nested variables try: if t == 'Literal': return Literal(reply[var_name]) else: return URIRef(reply[var_name]) except: raise KeyError("API response does not contain variable '" + var_name + "'") else: raise TypeError( "Configuration variable must be of type Literal, URI, API_Literal or API_URI" ) @staticmethod def replace_placeholders(placeholder, value, d): """ Recursively creates a copy of the input dictionary (d) and replaces all instances of the placeholder with value for all strings :param placeholder: a string to replace :param value: the replacement string :param d: the input dictionary :return: a copy of d with all occurences of placeholder replaced by value """ d = copy.deepcopy(d) for key in d: if isinstance(d[key], dict): d[key] = FairMetadata.replace_placeholders( placeholder, value, d[key]) elif isinstance(d[key], list): d[key] = [li.replace(placeholder, value) for li in d[key]] else: d[key] = d[key].replace(placeholder, value) if isinstance( d[key], str) else d[key] return d def rdf_from_specs(self, config, specs, namespaces, var_dict=None): """ Construct an RDF given an configuration and the relevant FDP specifications. :param config: Configuration for a given part of the FDP (e.g. root FDP, catalog, dataset, distribution, data content) :param specs: FDP specifications corresponding to config :param namespaces: A list of namespaces included in this RDF :param uid (optional): When the input :return: RDF with metadata """ # replace all occurrences of the variable placeholders if var_dict is not None: for placeholder in var_dict: config = self.replace_placeholders(placeholder, var_dict[placeholder], config) # TODO: why should this be blank? repository = URIRef('') # get empty rdf graph with namespaces metadata = Graph() # add repository identifier id_varname = specs["identifier"][ 'variable_name'] if 'variable_name' in specs[ "identifier"] else "identifier" repository_id = self.get_typed_variable(config[id_varname]) repository_id_uri = URIRef('/' + repository_id) metadata.add( (repository_id_uri, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef('http://purl.org/spar/datacite/ResourceIdentifier'))) metadata.add((repository_id_uri, URIRef(specs["identifier"]['uri']), Literal(repository_id))) metadata.add((repository_id, URIRef(specs["identifier"]['uri']), repository_id_uri)) #load according to provided specification for spec_id in specs: if spec_id != id_varname: try: varname = specs[spec_id][ 'variable_name'] if 'variable_name' in specs[ spec_id] else spec_id obj = self.get_typed_variable(config[varname]) # TODO: should type be enforced in the specs? # obj = URIRef(config[varname]) if ('is_uri' in specs[spec_id] and specs[spec_id]['is_uri']) else Literal(config[varname]) print((repository, specs[spec_id]['uri'], obj, varname)) metadata.add( (repository, URIRef(specs[spec_id]['uri']), obj)) except KeyError: if specs[spec_id]['required']: self.logger.exception( "A required data field was not provided: " + spec_id) else: self.logger.info( "An optional data field was not provided: " + spec_id) for name in namespaces: metadata.bind(name, namespaces[name]) return metadata def get_rdf(self, config, specs, uid, placeholder): if uid is None: return self.rdf_from_specs(config, specs, self.specs['namespaces']) elif uid not in config: return self.rdf_from_specs(config[placeholder], specs, self.specs['namespaces'], {placeholder: uid}) else: return self.rdf_from_specs(config[uid], specs, self.specs['namespaces']) def fdp(self): return self.get_rdf(self.config, self.specs['fdp'], None, None) def catalogs(self): """ Iterates over all catalogs and returns a concatenation of their meta data :return: """ metadata = Graph() for uid in self.config['catalogs']: # TODO: deal with catalogs from the API if uid != "{catalog}": metadata = metadata + self.get_rdf(self.config['catalogs'], self.specs['catalog'], uid, "{uid}") return metadata def catalog(self, uid): return self.get_rdf(self.config['catalogs'], self.specs['catalog'], uid, "{uid}") def dataset(self, uid): return self.get_rdf(self.config['catalogs'][uid]['dataset'], self.specs['dataset'], uid, "{uid}") def distribution(self, uid): return self.get_rdfs(self.config['catalogs'][uid]['distribution'], self.specs['distribution'], uid, "{uid}") def record(self, study_id): """Generates the metadata related to a data record.""" raise NotImplementedError()
class Main(object): """Handle application lifecycle.""" def __init__(self): """Initialize signal handlers, API client and run.""" self._should_close = False self._frame_ingestor = FrameIngestor() self._batcher = DataBatcher() # Add signal handlers signal.signal(signal.SIGINT, self._handle_signals) signal.signal(signal.SIGTERM, self._handle_signals) # Extra configuration self._uses_file = False self._display_frame = read_int_from_env('DISPLAY_FRAME', 0) > 0 self._max_frame_width = read_int_from_env( 'MAX_FRAME_WIDTH', DEFAULT_FRAME_WIDTH, ) # Start client self._start() def _init_counter(self): """Set up people counter.""" try: configuration = load_counter_configuration() except RuntimeError as ex: msg = 'Could not load counter settings: {0}'.format(ex) logging.error(msg) self._should_close = True self._close() return logging.info('Setting up counter...') self._counter = PeopleCounter(configuration) def _init_archiver(self, start_time: datetime): """Set up archiver and save initial message. Args: start_time: monitoring start timestamp """ archives_dir = os.getenv('ARCHIVES_DIR', None) if not archives_dir: logging.error('Missing ARCHIVES_DIR') self._should_close = True return start_time_string = start_time.strftime('%Y-%m-%d_%H_%M_%S') archive_path = '{0}/{1}.csv'.format(archives_dir, start_time_string) logging.info('Opening archive...') try: self._archiver = CsvArchiver(archive_path) except RuntimeError as ex1: msg = 'Could not setup archive: {0}'.format(ex1) logging.error(msg) self._archiver = None self._should_close = True try: self._archiver.init() except RuntimeError as ex2: msg = 'Could not init archive: {0}'.format(ex2) logging.error(msg) self._archiver = None self._should_close = True # Save initial event self._archiver.append_event( self._last_batch_time, EventType.monitoring_started, ) def _init_api_connector(self, start_time: datetime): """Set up uplink and send initial message. Args: start_time: monitoring start timestamp """ try: configuration = load_api_configuration() except RuntimeError as ex: msg = 'Could not load API settings: {0}'.format(ex) logging.error(msg) self._should_close = True self._close() return camera_pk = configuration.camera_pk logging.info('Setting up uplink...') self._api_connector = ApiConnector( camera_pk, configuration, start_time, ) def _start(self): """Start the system.""" self._start_monitoring() while not self._should_close: # Execute loop self._execute_loop() self._close() def _start_monitoring(self): """Start monitoring coroutines.""" # Save start time start_time = datetime.now().astimezone() self._last_batch_time = start_time logging.info('System starts') # Start monitoring self._init_counter() self._init_archiver(start_time) self._init_api_connector(start_time) self._init_ingestion_stream() def _execute_loop(self): """Execute main program loop.""" # Get camera frame frame = self._frame_ingestor.get_frame() if self._uses_file and frame is None: self._should_close = True return frame = resize(frame, width=self._max_frame_width) if self._display_frame: cv2.imshow('Frame', frame) cv2.waitKey(1) # Update counter current_time = datetime.now().astimezone() self._counter.update(frame, current_time) self._batcher.entered(self._counter.get_entering_list()) self._batcher.left(self._counter.get_leaving_list()) # Run batching delta = current_time - self._last_batch_time if delta.total_seconds() > BATCH_SECONDS: self._last_batch_time = current_time # Log data batch = self._batcher.batch() logging.debug('People in: {0}'.format(len(batch.entering))) logging.debug('People out: {0}'.format(len(batch.leaving))) # Add to archiver self._archiver.append(batch) self._archiver.flush() # Send to API endpoint if not self._api_connector.send(batch): logging.warn('Could not upload events') def _close(self): """Close the system.""" # Save start time shutdown_time = datetime.now().astimezone() logging.info('System shutting down...') # Finish ingestion stream if self._display_frame: cv2.destroyAllWindows() if self._frame_ingestor.has_source(): logging.info('Closing ingestor source...') self._frame_ingestor.release_source() # Finish archiver if self._archiver: logging.info('Closing archive...') self._archiver.append_event( shutdown_time, EventType.monitoring_ended, ) self._archiver.finalize() # Finish monitoring if self._api_connector: logging.info('Closing uplink...') self._api_connector.close(shutdown_time) def _init_ingestion_stream(self): """Init ingestion stream.""" logging.info('Opening camera stream...') stream_type = read_int_from_env('FRAME_SOURCE') if stream_type is None: self._should_close = True return if stream_type == 0: # File self._uses_file = True path = os.getenv('SOURCE_FILE_PATH', None) source = FileIngestorSource(path) elif stream_type == 1: # Webcam stream_num = read_int_from_env('WEBCAM_STREAM_NUM') if stream_num is None: self._should_close = True return source = WebcamIngestorSource(stream_num) try: self._frame_ingestor.register_source(source) except RuntimeError as ex: msg = 'Could not register_source: {0}'.format(ex) logging.error(msg) self._should_close = True def _handle_signals(self, _signum, _frame): """Handle interruption events. Args: _signum: Signal number _frame: Current stack frame """ self._should_close = True
curr_titles = TitleSaver.read_title_file(startfile) curr_titles -= all_discards curr_titles -= all_redirects all_pending = utils.limit_titles(all_titles, curr_titles, tot_num_titles) if len(curr_titles) == 0: sys.exit("No new titles to crawl. Quitting...") # Process a batch, use links from the batch in a future batch, ... all_content = [] while curr_level <= args['levels'] and len(curr_titles) > 0 and len(all_content) < args['maxpages']: if not args['seed']: print("Level {} >>>".format(curr_level)) print("Processing batch of {} {} titles...".format(len(curr_titles), context)) areader = ArticleReader(transcludes=cfg['transcludes'], restricted=args['restricted']) bproc = BatchProcessor(ApiConnector(**cfg['api']).func, 1, areader, seed=args['seed']) articles = bproc.batch_call_api(curr_titles) print("Reading {} articles...".format(len(articles))) contents, next_titles, trans_titles = bproc.read_articles(articles) # Don't add duplicates uniq_contents = [] for content in contents: currid = str(content['pageid']) if currid not in all_ids: uniq_contents.append(content) all_ids.add(currid) if cfg['transcludes']['add_to_curr_level']: # Adding to current level is aggressive
def __init__(self): logger.info('Init App Class') accounts_data = Api.get_accounts() self.account_data = accounts_data.json()['items'][0] self.account_id = self.account_data['id'] self.checker = json.loads(checker.read())
def send_resume_file(self, data_excel: dict): """ Отправляет данные и файл резюме на сервер, а так же прикрепляет кандидаата к вакансии исходя из данных в базе. :param data_excel: словарь данных полученный с помощью функции conversion_db() """ count_person = len(data_excel) statuses_list = Api.get_statuses_list(self.account_id)['items'] vacancy_list = Api.get_vacancies_list(self.account_id)['items'] for person in data_excel[self.checker['processed']:]: names = person['ФИО'].split(' ') len_names = len(names) if len_names != 0: last_name = names[0] if len_names > 1: first_name = names[1] if len_names > 2: middle_name = names[2] else: middle_name = None else: first_name = None else: last_name = None file_path = './Тестовое задание/' + person['Должность'] + '/' logger.debug('Start to looking resume file') for element in os.scandir(file_path): if element.is_file(): if last_name in element.name: logger.debug('File exist') resume_data = Api.upload_resume_file( self.account_id, filename=element.name, file_path=element.path) applicants_data = Api.add_applicants( last_name=resume_data['fields']['name']['last'], middle_name=resume_data['fields']['name'] ['middle'], first_name=resume_data['fields']['name']['first'], phone=resume_data['fields']['phones'][0], email=resume_data['fields']['email'], position=resume_data['fields']['position'], money=resume_data['fields']['salary'], birth_data=resume_data['fields']['birthdate'], photo_id=resume_data['photo']['id'], resume_file_id=resume_data['id'], account_id=self.account_id, resume_text=resume_data['text']) if person['Статус'].lower() in status_dict: status_name = status_dict[person['Статус'].lower()] for status in statuses_list: if status['name'] == status_name: status_id = status['id'] for vacancy in vacancy_list: if person['Должность'] == vacancy['position']: vacancy_id = vacancy['id'] Api.add_applicants_to_vacancy( account_id=self.account_id, applicant_id=applicants_data['id'], vacancy_id=vacancy_id, status_id=status_id, comment=person['Комментарий'], resume_file_id=resume_data['id']) self.checker['processed'] += 1 logger.info( f"Processed {self.checker['processed']} of {count_person} resumes" ) checker.seek(0) json.dump(self.checker, checker, ensure_ascii=False)