Ejemplo n.º 1
0
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug(
                        "[%s::%s] Getting temporary filepath from "
                        "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file", iqrs.uuid,
                                    fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug(
                        "[%s::%s] Adding new data to session "
                        "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError, ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr, ))

                    return str(uuid)
Ejemplo n.º 2
0
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                self.log.debug("[%s::%s] Moving uploaded file",
                               iqr_sess.uuid, fid)
                sess_upload = osp.join(iqr_sess.work_dir,
                                       osp.basename(upload_filepath))
                os.rename(upload_filepath, sess_upload)
                upload_data = DataFileElement(sess_upload)
                upload_data.uuid()

                # Extend session ingest -- modifying
                self.log.debug("[%s::%s] Adding new data to session positives",
                               iqr_sess.uuid, fid)
                iqr_sess.add_positive_data(upload_data)

                return str(upload_data.uuid())
Ejemplo n.º 3
0
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                iqr_sess = self.get_current_iqr_session()
                fid = flask.request.form['fid']

                self.log.debug("[%s::%s] Getting temporary filepath from "
                               "uploader module", iqr_sess.uuid, fid)
                upload_filepath = self.mod_upload.get_path_for_id(fid)
                self.mod_upload.clear_completed(fid)

                self.log.debug("[%s::%s] Moving uploaded file",
                               iqr_sess.uuid, fid)
                sess_upload = osp.join(iqr_sess.work_dir,
                                       osp.basename(upload_filepath))
                os.rename(upload_filepath, sess_upload)
                upload_data = DataFileElement(sess_upload)
                upload_data.uuid()

                # Extend session ingest -- modifying
                self.log.debug("[%s::%s] Adding new data to session positives",
                               iqr_sess.uuid, fid)
                iqr_sess.add_positive_data(upload_data)

                return str(upload_data.uuid())
Ejemplo n.º 4
0
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            # Start the ingest of a FID when POST
            if flask.request.method == "POST":
                with self.get_current_iqr_session() as iqrs:
                    fid = flask.request.form['fid']

                    self._log.debug("[%s::%s] Getting temporary filepath from "
                                    "uploader module", iqrs.uuid, fid)
                    upload_filepath = self.mod_upload.get_path_for_id(fid)
                    self.mod_upload.clear_completed(fid)

                    self._log.debug("[%s::%s] Moving uploaded file",
                                    iqrs.uuid, fid)
                    sess_upload = osp.join(self._iqr_work_dirs[iqrs.uuid],
                                           osp.basename(upload_filepath))
                    os.rename(upload_filepath, sess_upload)
                    upload_data = DataFileElement(sess_upload)
                    uuid = upload_data.uuid()
                    self._iqr_example_data[iqrs.uuid][uuid] = upload_data

                    # Extend session ingest -- modifying
                    self._log.debug("[%s::%s] Adding new data to session "
                                    "positives", iqrs.uuid, fid)
                    # iqrs.add_positive_data(upload_data)
                    try:
                        upload_descr = \
                            self._descriptor_generator.compute_descriptor(
                                upload_data, self._descr_elem_factory
                            )
                    except ValueError, ex:
                        return "Input Error: %s" % str(ex), 400

                    self._iqr_example_pos_descr[iqrs.uuid][uuid] = upload_descr
                    iqrs.adjudicate((upload_descr,))

                    return str(uuid)
Ejemplo n.º 5
0
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            fid = flask.request.form['fid']

            sid = self.get_current_iqr_session()

            self._log.debug(
                "[%s::%s] Getting temporary filepath from "
                "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            self._log.debug("[%s::%s] Moving uploaded file", sid, fid)
            sess_upload = osp.join(self._iqr_work_dirs[sid],
                                   osp.basename(upload_filepath))
            os.rename(upload_filepath, sess_upload)

            # Record uploaded data as user example data for this session.
            upload_data = DataFileElement(sess_upload)
            uuid = upload_data.uuid()
            self._iqr_example_data[sid][uuid] = upload_data

            # Extend session ingest -- modifying
            self._log.debug(
                "[%s::%s] Adding new data to session "
                "external positives", sid, fid)
            data_b64 = base64.b64encode(upload_data.get_bytes())
            data_ct = upload_data.content_type()
            r = self._iqr_service.post('add_external_pos',
                                       sid=sid,
                                       base64=data_b64,
                                       content_type=data_ct)
            r.raise_for_status()

            return str(uuid)
Ejemplo n.º 6
0
        def iqr_ingest_file():
            """
            Ingest the file with the given UID, getting the path from the
            uploader.

            :return: string of data/descriptor element's UUID
            :rtype: str

            """
            # TODO: Add status dict with a "GET" method branch for getting that
            #       status information.

            fid = flask.request.form['fid']

            sid = self.get_current_iqr_session()

            self._log.debug("[%s::%s] Getting temporary filepath from "
                            "uploader module", sid, fid)
            upload_filepath = self.mod_upload.get_path_for_id(fid)
            self.mod_upload.clear_completed(fid)

            self._log.debug("[%s::%s] Moving uploaded file",
                            sid, fid)
            sess_upload = osp.join(self._iqr_work_dirs[sid],
                                   osp.basename(upload_filepath))
            os.rename(upload_filepath, sess_upload)

            # Record uploaded data as user example data for this session.
            upload_data = DataFileElement(sess_upload)
            uuid = upload_data.uuid()
            self._iqr_example_data[sid][uuid] = upload_data

            # Extend session ingest -- modifying
            self._log.debug("[%s::%s] Adding new data to session "
                            "external positives", sid, fid)
            data_b64 = base64.b64encode(upload_data.get_bytes())
            data_ct = upload_data.content_type()
            r = self._iqr_service.post('add_external_pos', sid=sid,
                                       base64=data_b64, content_type=data_ct)
            r.raise_for_status()

            return str(uuid)
Ejemplo n.º 7
0
def classify_files(config, label, file_globs):
    log = logging.getLogger(__name__)

    #: :type: smqtk.algorithms.Classifier
    classifier = \
        plugin.from_plugin_config(config['classifier'],
                                  get_classifier_impls())

    def log_avaialable_labels():
        log.info("Available classifier labels:")
        for l in classifier.get_labels():
            log.info("- %s", l)

    if label is None:
        log_avaialable_labels()
        return
    elif label not in classifier.get_labels():
        log.error(
            "Invalid classification label provided to compute and filter "
            "on: '%s'", label)
        log_avaialable_labels()
        return

    log.info("Collecting files from globs")
    #: :type: list[DataFileElement]
    data_elements = []
    uuid2filepath = {}
    for g in file_globs:
        if os.path.isfile(g):
            d = DataFileElement(g)
            data_elements.append(d)
            uuid2filepath[d.uuid()] = g
        else:
            log.debug("expanding glob: %s", g)
            for fp in glob.iglob(g):
                d = DataFileElement(fp)
                data_elements.append(d)
                uuid2filepath[d.uuid()] = fp
    if not data_elements:
        raise RuntimeError("No files provided for classification.")

    log.info("Computing descriptors")
    descriptor_factory = \
        DescriptorElementFactory.from_config(config['descriptor_factory'])
    #: :type: smqtk.algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(config['descriptor_generator'],
                                  get_descriptor_generator_impls())
    descr_map = descriptor_generator\
        .compute_descriptor_async(data_elements, descriptor_factory)

    log.info("Classifying descriptors")
    classification_factory = ClassificationElementFactory \
        .from_config(config['classification_factory'])
    classification_map = classifier\
        .classify_async(descr_map.values(), classification_factory)

    log.info("Printing input file paths that classified as the given label.")
    # map of UUID to filepath:
    uuid2c = dict((c.uuid, c) for c in classification_map.itervalues())
    for data in data_elements:
        if uuid2c[data.uuid()].max_label() == label:
            print uuid2filepath[data.uuid()]
Ejemplo n.º 8
0
def classify_files(config, label, file_globs):
    log = logging.getLogger(__name__)

    #: :type: smqtk.algorithms.Classifier
    classifier = \
        plugin.from_plugin_config(config['classifier'],
                                  get_classifier_impls())

    def log_avaialable_labels():
        log.info("Available classifier labels:")
        for l in classifier.get_labels():
            log.info("- %s", l)

    if label is None:
        log_avaialable_labels()
        return
    elif label not in classifier.get_labels():
        log.error("Invalid classification label provided to compute and filter "
                  "on: '%s'", label)
        log_avaialable_labels()
        return

    log.info("Collecting files from globs")
    #: :type: list[DataFileElement]
    data_elements = []
    uuid2filepath = {}
    for g in file_globs:
        if os.path.isfile(g):
            d = DataFileElement(g)
            data_elements.append(d)
            uuid2filepath[d.uuid()] = g
        else:
            log.debug("expanding glob: %s", g)
            for fp in glob.iglob(g):
                d = DataFileElement(fp)
                data_elements.append(d)
                uuid2filepath[d.uuid()] = fp
    if not data_elements:
        raise RuntimeError("No files provided for classification.")

    log.info("Computing descriptors")
    descriptor_factory = \
        DescriptorElementFactory.from_config(config['descriptor_factory'])
    #: :type: smqtk.algorithms.DescriptorGenerator
    descriptor_generator = \
        plugin.from_plugin_config(config['descriptor_generator'],
                                  get_descriptor_generator_impls())
    descr_map = descriptor_generator\
        .compute_descriptor_async(data_elements, descriptor_factory)

    log.info("Classifying descriptors")
    classification_factory = ClassificationElementFactory \
        .from_config(config['classification_factory'])
    classification_map = classifier\
        .classify_async(list(descr_map.values()), classification_factory)

    log.info("Printing input file paths that classified as the given label.")
    # map of UUID to filepath:
    uuid2c = dict((c.uuid, c) for c in six.itervalues(classification_map))
    for data in data_elements:
        d_uuid = data.uuid()
        log.debug("'{}' classification map: {}".format(
            uuid2filepath[d_uuid], uuid2c[d_uuid].get_classification()
        ))
        if uuid2c[d_uuid].max_label() == label:
            print(uuid2filepath[d_uuid])