Example #1
0
    def _init_api_connector(self, start_time: datetime):
        """Set up uplink and send initial message.

        Args:
            start_time: monitoring start timestamp
        """
        try:
            configuration = load_api_configuration()
        except RuntimeError as ex:
            msg = 'Could not load API settings: {0}'.format(ex)
            logging.error(msg)
            self._should_close = True
            self._close()
            return
        camera_pk = configuration.camera_pk
        logging.info('Setting up uplink...')
        self._api_connector = ApiConnector(
            camera_pk, configuration, start_time,
        )
Example #2
0
    def __init__(self, conf, specs):
        #TODO: verify logger setup
        #set up the logger
        self.logger = logging.getLogger("fdp")
        self.logger.setLevel(logging.DEBUG)

        fh = logging.FileHandler('errors.log')
        fh.setLevel(logging.DEBUG)
        self.logger.addHandler(fh)

        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        self.logger.addHandler(ch)

        # initialize variables and validate the configuration
        self.config = conf
        self.specs = specs
        self.logger.debug("Validating the FDP configuration.")
        # TODO self.validate_configuration(conf, specs)

        self.logger.debug("Initializing the input API.")
        self.radar_api = ApiConnector(host=conf['api_host'],
                                      user=conf['api_user'],
                                      password=conf['api_password'])
Example #3
0
class FairMetadata(object):
    def __init__(self, conf, specs):
        #TODO: verify logger setup
        #set up the logger
        self.logger = logging.getLogger("fdp")
        self.logger.setLevel(logging.DEBUG)

        fh = logging.FileHandler('errors.log')
        fh.setLevel(logging.DEBUG)
        self.logger.addHandler(fh)

        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        self.logger.addHandler(ch)

        # initialize variables and validate the configuration
        self.config = conf
        self.specs = specs
        self.logger.debug("Validating the FDP configuration.")
        # TODO self.validate_configuration(conf, specs)

        self.logger.debug("Initializing the input API.")
        self.radar_api = ApiConnector(host=conf['api_host'],
                                      user=conf['api_user'],
                                      password=conf['api_password'])

    @staticmethod
    def validate_configuration(conf, specs):
        """
        Validate the values provided in the provided configuration to make sure that they conform to the specifications.
        Throws errors when specifications are violated.

        :param conf: The configuration
        :param specs: The specifications
        :return:
        """
        # make sure that the required fields are required are in the configuration
        raise NotImplementedError()

    def get_typed_variable(self, d):
        """
        Convert the input var_value into the given var_type

        :param dictionary containing the following:
            var_type: string with value Literal, URI, API_Literal or API_URI
            var_value: string or number
            var_name (optional): in the case that var_type = API_*, this specifies the variable name corresponding to
                the appropriate variable in the response.
        :return: A Literal or URIRef of the input var_value
        """
        # TODO: validate types of each variable (Literal, URI, Date, FOAF:Agent)
        #initialize variables
        var_type = d['type']
        var_name = d['varname'] if 'varname' in d else None
        var_value = d['value']

        if var_type == 'Literal':
            return Literal(var_value)
        elif var_type == 'URI':
            return URIRef(var_value)
        elif var_type == 'API_Literal' or var_type == 'API_URI':
            (_, t) = var_type.split('_')
            reply = self.radar_api.get(url=var_value)
            # TODO: add support for nested variables
            try:
                if t == 'Literal':
                    return Literal(reply[var_name])
                else:
                    return URIRef(reply[var_name])
            except:
                raise KeyError("API response does not contain variable '" +
                               var_name + "'")
        else:
            raise TypeError(
                "Configuration variable must be of type Literal, URI, API_Literal or API_URI"
            )

    @staticmethod
    def replace_placeholders(placeholder, value, d):
        """
        Recursively creates a copy of the input dictionary (d) and replaces all instances of the placeholder with value
        for all strings

        :param placeholder: a string to replace
        :param value: the replacement string
        :param d: the input dictionary
        :return: a copy of d with all occurences of placeholder replaced by value
        """
        d = copy.deepcopy(d)
        for key in d:
            if isinstance(d[key], dict):
                d[key] = FairMetadata.replace_placeholders(
                    placeholder, value, d[key])
            elif isinstance(d[key], list):
                d[key] = [li.replace(placeholder, value) for li in d[key]]
            else:
                d[key] = d[key].replace(placeholder, value) if isinstance(
                    d[key], str) else d[key]
        return d

    def rdf_from_specs(self, config, specs, namespaces, var_dict=None):
        """
        Construct an RDF given an configuration and the relevant FDP specifications.

        :param config: Configuration for a given part of the FDP (e.g. root FDP, catalog, dataset, distribution, data
            content)
        :param specs: FDP specifications corresponding to config
        :param namespaces: A list of namespaces included in this RDF
        :param uid (optional): When the input
        :return: RDF with metadata
        """
        # replace all occurrences of the variable placeholders
        if var_dict is not None:
            for placeholder in var_dict:
                config = self.replace_placeholders(placeholder,
                                                   var_dict[placeholder],
                                                   config)

        # TODO: why should this be blank?
        repository = URIRef('')

        # get empty rdf graph with namespaces
        metadata = Graph()

        # add repository identifier
        id_varname = specs["identifier"][
            'variable_name'] if 'variable_name' in specs[
                "identifier"] else "identifier"
        repository_id = self.get_typed_variable(config[id_varname])
        repository_id_uri = URIRef('/' + repository_id)
        metadata.add(
            (repository_id_uri,
             URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
             URIRef('http://purl.org/spar/datacite/ResourceIdentifier')))
        metadata.add((repository_id_uri, URIRef(specs["identifier"]['uri']),
                      Literal(repository_id)))
        metadata.add((repository_id, URIRef(specs["identifier"]['uri']),
                      repository_id_uri))

        #load according to provided specification
        for spec_id in specs:
            if spec_id != id_varname:
                try:
                    varname = specs[spec_id][
                        'variable_name'] if 'variable_name' in specs[
                            spec_id] else spec_id
                    obj = self.get_typed_variable(config[varname])
                    # TODO: should type be enforced in the specs?
                    # obj = URIRef(config[varname]) if ('is_uri' in specs[spec_id] and specs[spec_id]['is_uri']) else Literal(config[varname])
                    print((repository, specs[spec_id]['uri'], obj, varname))
                    metadata.add(
                        (repository, URIRef(specs[spec_id]['uri']), obj))
                except KeyError:
                    if specs[spec_id]['required']:
                        self.logger.exception(
                            "A required data field was not provided: " +
                            spec_id)
                    else:
                        self.logger.info(
                            "An optional data field was not provided: " +
                            spec_id)

        for name in namespaces:
            metadata.bind(name, namespaces[name])

        return metadata

    def get_rdf(self, config, specs, uid, placeholder):
        if uid is None:
            return self.rdf_from_specs(config, specs, self.specs['namespaces'])
        elif uid not in config:
            return self.rdf_from_specs(config[placeholder], specs,
                                       self.specs['namespaces'],
                                       {placeholder: uid})
        else:
            return self.rdf_from_specs(config[uid], specs,
                                       self.specs['namespaces'])

    def fdp(self):
        return self.get_rdf(self.config, self.specs['fdp'], None, None)

    def catalogs(self):
        """
        Iterates over all catalogs and returns a concatenation of their meta data
        :return:
        """
        metadata = Graph()
        for uid in self.config['catalogs']:
            # TODO: deal with catalogs from the API
            if uid != "{catalog}":
                metadata = metadata + self.get_rdf(self.config['catalogs'],
                                                   self.specs['catalog'], uid,
                                                   "{uid}")
        return metadata

    def catalog(self, uid):
        return self.get_rdf(self.config['catalogs'], self.specs['catalog'],
                            uid, "{uid}")

    def dataset(self, uid):
        return self.get_rdf(self.config['catalogs'][uid]['dataset'],
                            self.specs['dataset'], uid, "{uid}")

    def distribution(self, uid):
        return self.get_rdfs(self.config['catalogs'][uid]['distribution'],
                             self.specs['distribution'], uid, "{uid}")

    def record(self, study_id):
        """Generates the metadata related to a data record."""
        raise NotImplementedError()
Example #4
0
class Main(object):
    """Handle application lifecycle."""

    def __init__(self):
        """Initialize signal handlers, API client and run."""
        self._should_close = False
        self._frame_ingestor = FrameIngestor()
        self._batcher = DataBatcher()

        # Add signal handlers
        signal.signal(signal.SIGINT, self._handle_signals)
        signal.signal(signal.SIGTERM, self._handle_signals)

        # Extra configuration
        self._uses_file = False
        self._display_frame = read_int_from_env('DISPLAY_FRAME', 0) > 0
        self._max_frame_width = read_int_from_env(
            'MAX_FRAME_WIDTH', DEFAULT_FRAME_WIDTH,
        )

        # Start client
        self._start()

    def _init_counter(self):
        """Set up people counter."""
        try:
            configuration = load_counter_configuration()
        except RuntimeError as ex:
            msg = 'Could not load counter settings: {0}'.format(ex)
            logging.error(msg)
            self._should_close = True
            self._close()
            return
        logging.info('Setting up counter...')
        self._counter = PeopleCounter(configuration)

    def _init_archiver(self, start_time: datetime):
        """Set up archiver and save initial message.

        Args:
            start_time: monitoring start timestamp
        """
        archives_dir = os.getenv('ARCHIVES_DIR', None)
        if not archives_dir:
            logging.error('Missing ARCHIVES_DIR')
            self._should_close = True
            return

        start_time_string = start_time.strftime('%Y-%m-%d_%H_%M_%S')
        archive_path = '{0}/{1}.csv'.format(archives_dir, start_time_string)

        logging.info('Opening archive...')

        try:
            self._archiver = CsvArchiver(archive_path)
        except RuntimeError as ex1:
            msg = 'Could not setup archive: {0}'.format(ex1)
            logging.error(msg)
            self._archiver = None
            self._should_close = True

        try:
            self._archiver.init()
        except RuntimeError as ex2:
            msg = 'Could not init archive: {0}'.format(ex2)
            logging.error(msg)
            self._archiver = None
            self._should_close = True

        # Save initial event
        self._archiver.append_event(
            self._last_batch_time, EventType.monitoring_started,
        )

    def _init_api_connector(self, start_time: datetime):
        """Set up uplink and send initial message.

        Args:
            start_time: monitoring start timestamp
        """
        try:
            configuration = load_api_configuration()
        except RuntimeError as ex:
            msg = 'Could not load API settings: {0}'.format(ex)
            logging.error(msg)
            self._should_close = True
            self._close()
            return
        camera_pk = configuration.camera_pk
        logging.info('Setting up uplink...')
        self._api_connector = ApiConnector(
            camera_pk, configuration, start_time,
        )

    def _start(self):
        """Start the system."""
        self._start_monitoring()

        while not self._should_close:
            # Execute loop
            self._execute_loop()

        self._close()

    def _start_monitoring(self):
        """Start monitoring coroutines."""
        # Save start time
        start_time = datetime.now().astimezone()
        self._last_batch_time = start_time
        logging.info('System starts')

        # Start monitoring
        self._init_counter()
        self._init_archiver(start_time)
        self._init_api_connector(start_time)
        self._init_ingestion_stream()

    def _execute_loop(self):
        """Execute main program loop."""
        # Get camera frame
        frame = self._frame_ingestor.get_frame()
        if self._uses_file and frame is None:
            self._should_close = True
            return

        frame = resize(frame, width=self._max_frame_width)

        if self._display_frame:
            cv2.imshow('Frame', frame)
            cv2.waitKey(1)

        # Update counter
        current_time = datetime.now().astimezone()
        self._counter.update(frame, current_time)
        self._batcher.entered(self._counter.get_entering_list())
        self._batcher.left(self._counter.get_leaving_list())

        # Run batching
        delta = current_time - self._last_batch_time
        if delta.total_seconds() > BATCH_SECONDS:
            self._last_batch_time = current_time

            # Log data
            batch = self._batcher.batch()
            logging.debug('People in: {0}'.format(len(batch.entering)))
            logging.debug('People out: {0}'.format(len(batch.leaving)))

            # Add to archiver
            self._archiver.append(batch)
            self._archiver.flush()

            # Send to API endpoint
            if not self._api_connector.send(batch):
                logging.warn('Could not upload events')

    def _close(self):
        """Close the system."""
        # Save start time
        shutdown_time = datetime.now().astimezone()
        logging.info('System shutting down...')

        # Finish ingestion stream
        if self._display_frame:
            cv2.destroyAllWindows()
        if self._frame_ingestor.has_source():
            logging.info('Closing ingestor source...')
            self._frame_ingestor.release_source()

        # Finish archiver
        if self._archiver:
            logging.info('Closing archive...')
            self._archiver.append_event(
                shutdown_time, EventType.monitoring_ended,
            )
            self._archiver.finalize()

        # Finish monitoring
        if self._api_connector:
            logging.info('Closing uplink...')
            self._api_connector.close(shutdown_time)

    def _init_ingestion_stream(self):
        """Init ingestion stream."""
        logging.info('Opening camera stream...')

        stream_type = read_int_from_env('FRAME_SOURCE')
        if stream_type is None:
            self._should_close = True
            return

        if stream_type == 0:  # File
            self._uses_file = True
            path = os.getenv('SOURCE_FILE_PATH', None)
            source = FileIngestorSource(path)
        elif stream_type == 1:  # Webcam
            stream_num = read_int_from_env('WEBCAM_STREAM_NUM')
            if stream_num is None:
                self._should_close = True
                return
            source = WebcamIngestorSource(stream_num)

        try:
            self._frame_ingestor.register_source(source)
        except RuntimeError as ex:
            msg = 'Could not register_source: {0}'.format(ex)
            logging.error(msg)
            self._should_close = True

    def _handle_signals(self, _signum, _frame):
        """Handle interruption events.

        Args:
            _signum: Signal number
            _frame: Current stack frame
        """
        self._should_close = True
Example #5
0
curr_titles = TitleSaver.read_title_file(startfile)
curr_titles -= all_discards
curr_titles -= all_redirects
all_pending = utils.limit_titles(all_titles, curr_titles, tot_num_titles)
if len(curr_titles) == 0:
    sys.exit("No new titles to crawl. Quitting...")


# Process a batch, use links from the batch in a future batch, ...
all_content = []
while curr_level <= args['levels'] and len(curr_titles) > 0 and len(all_content) < args['maxpages']:
    if not args['seed']: print("Level {} >>>".format(curr_level))
    print("Processing batch of {} {} titles...".format(len(curr_titles), context))

    areader = ArticleReader(transcludes=cfg['transcludes'], restricted=args['restricted'])
    bproc = BatchProcessor(ApiConnector(**cfg['api']).func, 1, areader, seed=args['seed'])
    articles = bproc.batch_call_api(curr_titles)

    print("Reading {} articles...".format(len(articles)))
    contents, next_titles, trans_titles = bproc.read_articles(articles)

    # Don't add duplicates
    uniq_contents = []
    for content in contents:
        currid = str(content['pageid'])
        if currid not in all_ids:
            uniq_contents.append(content)
            all_ids.add(currid)

    if cfg['transcludes']['add_to_curr_level']:
        # Adding to current level is aggressive
Example #6
0
 def __init__(self):
     logger.info('Init App Class')
     accounts_data = Api.get_accounts()
     self.account_data = accounts_data.json()['items'][0]
     self.account_id = self.account_data['id']
     self.checker = json.loads(checker.read())
Example #7
0
    def send_resume_file(self, data_excel: dict):
        """
        Отправляет данные и файл резюме на сервер, а так же прикрепляет кандидаата к вакансии исходя из данных в базе.

        :param data_excel: словарь данных полученный с помощью функции conversion_db()
        """

        count_person = len(data_excel)

        statuses_list = Api.get_statuses_list(self.account_id)['items']
        vacancy_list = Api.get_vacancies_list(self.account_id)['items']

        for person in data_excel[self.checker['processed']:]:

            names = person['ФИО'].split(' ')
            len_names = len(names)

            if len_names != 0:
                last_name = names[0]
                if len_names > 1:
                    first_name = names[1]
                    if len_names > 2:
                        middle_name = names[2]
                    else:
                        middle_name = None
                else:
                    first_name = None
            else:
                last_name = None

            file_path = './Тестовое задание/' + person['Должность'] + '/'

            logger.debug('Start to looking resume file')

            for element in os.scandir(file_path):
                if element.is_file():
                    if last_name in element.name:
                        logger.debug('File exist')

                        resume_data = Api.upload_resume_file(
                            self.account_id,
                            filename=element.name,
                            file_path=element.path)

                        applicants_data = Api.add_applicants(
                            last_name=resume_data['fields']['name']['last'],
                            middle_name=resume_data['fields']['name']
                            ['middle'],
                            first_name=resume_data['fields']['name']['first'],
                            phone=resume_data['fields']['phones'][0],
                            email=resume_data['fields']['email'],
                            position=resume_data['fields']['position'],
                            money=resume_data['fields']['salary'],
                            birth_data=resume_data['fields']['birthdate'],
                            photo_id=resume_data['photo']['id'],
                            resume_file_id=resume_data['id'],
                            account_id=self.account_id,
                            resume_text=resume_data['text'])

                        if person['Статус'].lower() in status_dict:
                            status_name = status_dict[person['Статус'].lower()]
                            for status in statuses_list:
                                if status['name'] == status_name:
                                    status_id = status['id']

                        for vacancy in vacancy_list:
                            if person['Должность'] == vacancy['position']:
                                vacancy_id = vacancy['id']

                        Api.add_applicants_to_vacancy(
                            account_id=self.account_id,
                            applicant_id=applicants_data['id'],
                            vacancy_id=vacancy_id,
                            status_id=status_id,
                            comment=person['Комментарий'],
                            resume_file_id=resume_data['id'])

                        self.checker['processed'] += 1
                        logger.info(
                            f"Processed {self.checker['processed']} of {count_person} resumes"
                        )
                        checker.seek(0)
                        json.dump(self.checker, checker, ensure_ascii=False)