def iqr_ingest_file(): """ Ingest the file with the given UID, getting the path from the uploader. :return: string of data/descriptor element's UUID :rtype: str """ # TODO: Add status dict with a "GET" method branch for getting that # status information. # Start the ingest of a FID when POST if flask.request.method == "POST": with self.get_current_iqr_session() as iqrs: fid = flask.request.form['fid'] self._log.debug( "[%s::%s] Getting temporary filepath from " "uploader module", iqrs.uuid, fid) upload_filepath = self.mod_upload.get_path_for_id(fid) self.mod_upload.clear_completed(fid) self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid, fid) sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid], osp.basename(upload_filepath)) os.rename(upload_filepath, sess_upload) upload_data = DataFileElement(sess_upload) uuid = upload_data.uuid() self._iqr_example_data[iqrs.uuid][uuid] = upload_data # Extend session ingest -- modifying self._log.debug( "[%s::%s] Adding new data to session " "positives", iqrs.uuid, fid) # iqrs.add_positive_data(upload_data) try: upload_descr = \ self._descriptor_generator.compute_descriptor( upload_data, self._descr_elem_factory ) except ValueError, ex: return "Input Error: %s" % str(ex), 400 self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr iqrs.adjudicate((upload_descr, )) return str(uuid)
def iqr_ingest_file(): """ Ingest the file with the given UID, getting the path from the uploader. :return: string of data/descriptor element's UUID :rtype: str """ # TODO: Add status dict with a "GET" method branch for getting that # status information. # Start the ingest of a FID when POST if flask.request.method == "POST": iqr_sess = self.get_current_iqr_session() fid = flask.request.form['fid'] self.log.debug("[%s::%s] Getting temporary filepath from " "uploader module", iqr_sess.uuid, fid) upload_filepath = self.mod_upload.get_path_for_id(fid) self.mod_upload.clear_completed(fid) self.log.debug("[%s::%s] Moving uploaded file", iqr_sess.uuid, fid) sess_upload = osp.join(iqr_sess.work_dir, osp.basename(upload_filepath)) os.rename(upload_filepath, sess_upload) upload_data = DataFileElement(sess_upload) upload_data.uuid() # Extend session ingest -- modifying self.log.debug("[%s::%s] Adding new data to session positives", iqr_sess.uuid, fid) iqr_sess.add_positive_data(upload_data) return str(upload_data.uuid())
def iqr_ingest_file(): """ Ingest the file with the given UID, getting the path from the uploader. :return: string of data/descriptor element's UUID :rtype: str """ # TODO: Add status dict with a "GET" method branch for getting that # status information. # Start the ingest of a FID when POST if flask.request.method == "POST": with self.get_current_iqr_session() as iqrs: fid = flask.request.form['fid'] self._log.debug("[%s::%s] Getting temporary filepath from " "uploader module", iqrs.uuid, fid) upload_filepath = self.mod_upload.get_path_for_id(fid) self.mod_upload.clear_completed(fid) self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid, fid) sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid], osp.basename(upload_filepath)) os.rename(upload_filepath, sess_upload) upload_data = DataFileElement(sess_upload) uuid = upload_data.uuid() self._iqr_example_data[iqrs.uuid][uuid] = upload_data # Extend session ingest -- modifying self._log.debug("[%s::%s] Adding new data to session " "positives", iqrs.uuid, fid) # iqrs.add_positive_data(upload_data) try: upload_descr = \ self._descriptor_generator.compute_descriptor( upload_data, self._descr_elem_factory ) except ValueError, ex: return "Input Error: %s" % str(ex), 400 self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr iqrs.adjudicate((upload_descr,)) return str(uuid)
def iqr_ingest_file(): """ Ingest the file with the given UID, getting the path from the uploader. :return: string of data/descriptor element's UUID :rtype: str """ # TODO: Add status dict with a "GET" method branch for getting that # status information. fid = flask.request.form['fid'] sid = self.get_current_iqr_session() self._log.debug( "[%s::%s] Getting temporary filepath from " "uploader module", sid, fid) upload_filepath = self.mod_upload.get_path_for_id(fid) self.mod_upload.clear_completed(fid) self._log.debug("[%s::%s] Moving uploaded file", sid, fid) sess_upload = osp.join(self._iqr_work_dirs[sid], osp.basename(upload_filepath)) os.rename(upload_filepath, sess_upload) # Record uploaded data as user example data for this session. upload_data = DataFileElement(sess_upload) uuid = upload_data.uuid() self._iqr_example_data[sid][uuid] = upload_data # Extend session ingest -- modifying self._log.debug( "[%s::%s] Adding new data to session " "external positives", sid, fid) data_b64 = base64.b64encode(upload_data.get_bytes()) data_ct = upload_data.content_type() r = self._iqr_service.post('add_external_pos', sid=sid, base64=data_b64, content_type=data_ct) r.raise_for_status() return str(uuid)
def iqr_ingest_file(): """ Ingest the file with the given UID, getting the path from the uploader. :return: string of data/descriptor element's UUID :rtype: str """ # TODO: Add status dict with a "GET" method branch for getting that # status information. fid = flask.request.form['fid'] sid = self.get_current_iqr_session() self._log.debug("[%s::%s] Getting temporary filepath from " "uploader module", sid, fid) upload_filepath = self.mod_upload.get_path_for_id(fid) self.mod_upload.clear_completed(fid) self._log.debug("[%s::%s] Moving uploaded file", sid, fid) sess_upload = osp.join(self._iqr_work_dirs[sid], osp.basename(upload_filepath)) os.rename(upload_filepath, sess_upload) # Record uploaded data as user example data for this session. upload_data = DataFileElement(sess_upload) uuid = upload_data.uuid() self._iqr_example_data[sid][uuid] = upload_data # Extend session ingest -- modifying self._log.debug("[%s::%s] Adding new data to session " "external positives", sid, fid) data_b64 = base64.b64encode(upload_data.get_bytes()) data_ct = upload_data.content_type() r = self._iqr_service.post('add_external_pos', sid=sid, base64=data_b64, content_type=data_ct) r.raise_for_status() return str(uuid)
def classify_files(config, label, file_globs): log = logging.getLogger(__name__) #: :type: smqtk.algorithms.Classifier classifier = \ plugin.from_plugin_config(config['classifier'], get_classifier_impls()) def log_avaialable_labels(): log.info("Available classifier labels:") for l in classifier.get_labels(): log.info("- %s", l) if label is None: log_avaialable_labels() return elif label not in classifier.get_labels(): log.error( "Invalid classification label provided to compute and filter " "on: '%s'", label) log_avaialable_labels() return log.info("Collecting files from globs") #: :type: list[DataFileElement] data_elements = [] uuid2filepath = {} for g in file_globs: if os.path.isfile(g): d = DataFileElement(g) data_elements.append(d) uuid2filepath[d.uuid()] = g else: log.debug("expanding glob: %s", g) for fp in glob.iglob(g): d = DataFileElement(fp) data_elements.append(d) uuid2filepath[d.uuid()] = fp if not data_elements: raise RuntimeError("No files provided for classification.") log.info("Computing descriptors") descriptor_factory = \ DescriptorElementFactory.from_config(config['descriptor_factory']) #: :type: smqtk.algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(config['descriptor_generator'], get_descriptor_generator_impls()) descr_map = descriptor_generator\ .compute_descriptor_async(data_elements, descriptor_factory) log.info("Classifying descriptors") classification_factory = ClassificationElementFactory \ .from_config(config['classification_factory']) classification_map = classifier\ .classify_async(descr_map.values(), classification_factory) log.info("Printing input file paths that classified as the given label.") # map of UUID to filepath: uuid2c = dict((c.uuid, c) for c in classification_map.itervalues()) for data in data_elements: if uuid2c[data.uuid()].max_label() == label: print uuid2filepath[data.uuid()]
def classify_files(config, label, file_globs): log = logging.getLogger(__name__) #: :type: smqtk.algorithms.Classifier classifier = \ plugin.from_plugin_config(config['classifier'], get_classifier_impls()) def log_avaialable_labels(): log.info("Available classifier labels:") for l in classifier.get_labels(): log.info("- %s", l) if label is None: log_avaialable_labels() return elif label not in classifier.get_labels(): log.error("Invalid classification label provided to compute and filter " "on: '%s'", label) log_avaialable_labels() return log.info("Collecting files from globs") #: :type: list[DataFileElement] data_elements = [] uuid2filepath = {} for g in file_globs: if os.path.isfile(g): d = DataFileElement(g) data_elements.append(d) uuid2filepath[d.uuid()] = g else: log.debug("expanding glob: %s", g) for fp in glob.iglob(g): d = DataFileElement(fp) data_elements.append(d) uuid2filepath[d.uuid()] = fp if not data_elements: raise RuntimeError("No files provided for classification.") log.info("Computing descriptors") descriptor_factory = \ DescriptorElementFactory.from_config(config['descriptor_factory']) #: :type: smqtk.algorithms.DescriptorGenerator descriptor_generator = \ plugin.from_plugin_config(config['descriptor_generator'], get_descriptor_generator_impls()) descr_map = descriptor_generator\ .compute_descriptor_async(data_elements, descriptor_factory) log.info("Classifying descriptors") classification_factory = ClassificationElementFactory \ .from_config(config['classification_factory']) classification_map = classifier\ .classify_async(list(descr_map.values()), classification_factory) log.info("Printing input file paths that classified as the given label.") # map of UUID to filepath: uuid2c = dict((c.uuid, c) for c in six.itervalues(classification_map)) for data in data_elements: d_uuid = data.uuid() log.debug("'{}' classification map: {}".format( uuid2filepath[d_uuid], uuid2c[d_uuid].get_classification() )) if uuid2c[d_uuid].max_label() == label: print(uuid2filepath[d_uuid])