Example #1
0
def test():
    global Configurator
    Configurator = fake_config
    ws = DAMWorkspace.objects.get(pk=1)
    user = User.objects.get(username='******')
    t = TriggerEvent.objects.get_or_create(name="test")[0]
    print 't.pk=', t.pk
    try:
        print 'attempting to reuse pipeline'
        pipeline = Pipeline.objects.get(name='test4', workspace=ws)
    except:
        print 'creating new pipeline'
        pipeline = Pipeline.objects.create(name="test4",
                                           description='',
                                           params=dumps(test_pipe2),
                                           workspace=ws)
        print 'ok'
        pipeline.triggers.add(t)
        print 'ok2'
        pipeline.save()
        print 'done'
    process = Process.objects.create(pipeline=pipeline,
                                     workspace=ws,
                                     launched_by=user)
    for n in xrange(15):
        print 'adding target %d' % n
        process.add_params(item='item%d' % n)
    try:
        batch = Batch(process)
        d = batch.run()
        d.addBoth(end_test, process)
    except Exception, e:
        log.error("Fatal initialization error: %s" % str(e))
        reactor.stop()
Example #2
0
    def _iterate(self):
        """ Run the actions listed in schedule on the items returned by _new_batch """
        #log.debug('_iterate: oustanding=%s' % self.outstanding) #d
        if self.gameover:
            log.debug('_iterate: gameover')
            return
        action, task = self._get_action()
        if action:
            item, schedule = task['item'], task['schedule']
            method, params = self.scripts[action]
            try:
                item_params = loads(item.params)

                # tmp bug fixing starts here
                for k in params.keys():
                    if params[k] == '' and (k in item_params[action]):
                        params[k] = item_params[action][k]
                # tmp bug fixing ends here

                params.update(item_params.get('*', {}))
                x = re.compile('^[a-z_]+')  # cut out digits from action name
                params.update(item_params.get(x.match(action).group(), {}))
                self.outstanding += 1
                #params = {u'source_variant_name': u'original'}
                d = method(self.process.workspace, item.target_id, **params)
            except Exception, e:
                log.error('ERROR in %s: %s %s' %
                          (str(method), type(e), str(e)))
                self._handle_err(str(e), item, schedule, action, params)
            else:
                d.addCallbacks(self._handle_ok,
                               self._handle_err,
                               callbackArgs=[item, schedule, action, params],
                               errbackArgs=[item, schedule, action, params])
Example #3
0
    def _iterate(self):
        """ Run the actions listed in schedule on the items returned by _new_batch """
        #log.debug('_iterate: oustanding=%s' % self.outstanding) #d
        if self.gameover:
            log.debug('_iterate: gameover')
            return
        action, task = self._get_action()
        if action:
            item, schedule = task['item'], task['schedule']
            method, params = self.scripts[action]
            try:
                item_params = loads(item.params)

                # tmp bug fixing starts here
                for k in params.keys():
                    if params[k] == '' and (k in item_params[action]):
                        params[k] = item_params[action][k]
                # tmp bug fixing ends here

                params.update(item_params.get('*', {}))
                x = re.compile('^[a-z_]+' ) # cut out digits from action name
                params.update(item_params.get(x.match(action).group(), {}))
                self.outstanding += 1
                #params = {u'source_variant_name': u'original'}
                d = method(self.process.workspace, item.target_id, **params)
            except Exception, e:
                log.error('ERROR in %s: %s %s' % (str(method), type(e), str(e)))
                self._handle_err(str(e), item, schedule, action, params)
            else:
                d.addCallbacks(self._handle_ok, self._handle_err, 
                    callbackArgs=[item, schedule, action, params], errbackArgs=[item, schedule, action, params])
Example #4
0
def test():
    global Configurator
    Configurator = fake_config
    ws = DAMWorkspace.objects.get(pk=1)
    user = User.objects.get(username='******')
    t = TriggerEvent.objects.get_or_create(name="test")[0]
    print 't.pk=', t.pk
    try: 
        print 'attempting to reuse pipeline'
        pipeline = Pipeline.objects.get(name='test4', workspace=ws)
    except:
        print 'creating new pipeline'
        pipeline = Pipeline.objects.create(name="test4", description='', params=dumps(test_pipe2), workspace=ws)
        print 'ok'
        pipeline.triggers.add(t)
        print 'ok2'
        pipeline.save()
        print 'done'
    process = Process.objects.create(pipeline=pipeline, workspace=ws, launched_by=user)
    for n in xrange(15):
        print 'adding target %d' % n
        process.add_params(item = 'item%d' % n)
    try:
        batch = Batch(process)
        d = batch.run()
        d.addBoth(end_test, process)
    except Exception, e:
        log.error("Fatal initialization error: %s" % str(e))
        reactor.stop()
Example #5
0
 def handle_result(self, result, *args):
     #log.debug('= handle_result %s' % str(result)[:128])
     try:
         return_value = self.parse_stdout(result['data'], *args)
         self.deferred.callback(return_value)
     except Exception, e:
         log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e)))
         self.deferred.errback(e)
Example #6
0
 def execute(self, **params):     
     # get basic data (avoid creating stuff in DB)
     try:
         self.get_cmdline(**params)
         args = splitstring(self.cmdline)
     except Exception, e:
         log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e)))
         self.deferred.errback(e)
Example #7
0
 def _save_features(self, features, extractor_type):
     "save results of basic extractions"
     metadata_list, delete_list = [], []
     #log.debug('ExtractBasicFeatures._save_features: %s' % features)
     ctype = ContentType.objects.get_for_model(self.source)
     try:
         save_type(ctype, self.source)
     except Exception, e:
         log.error("Failed to save component format as DC:Format: %s" % (str(e)))
Example #8
0
 def _save_features(self, features, extractor_type):
     "save results of basic extractions"
     metadata_list, delete_list = [], []
     #log.debug('ExtractBasicFeatures._save_features: %s' % features)
     ctype = ContentType.objects.get_for_model(self.source)
     try:
         save_type(ctype, self.source)
     except Exception, e:
         log.error("Failed to save component format as DC:Format: %s" %
                   (str(e)))
Example #9
0
 def _handle_err(self, result, item, schedule, action, params):
     log.error('_handle_err action %s on target_id=%s: %s' % (action, item.target_id, str(result)))
     self.outstanding -= 1
     cancelled = schedule.fail(action)
     self._update_item_stats(item, action, str(result), 0, 1, 0)
     for a in cancelled:
         self._update_item_stats(item, a, "cancelled on failed %s" % action, 0, 0, 1)
     if self.outstanding < self.max_outstanding:
         #log.debug('_handle_err: rescheduling') #d
         reactor.callLater(0, self._iterate)
     item.save()
Example #10
0
 def _cb_xmp_ok(self, features):
     try:
         ctype_component = ContentType.objects.get_for_model(self.component)
         ctype = ContentType.objects.get_for_model(self.item)
         xpath = re.compile(r"(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}")
         user = self.item.uploaded_by()
         metadata_default_language = get_metadata_default_language(user)
     except Exception, e:
         log.error("Error in %s: %s %s" % (self.__class__.__name__, type(e), str(e)))
         self.deferred.errback(e)
         return
Example #11
0
 def handle_result(self, result, component):
     log.debug('handle_result %s' % str(result))
     log.debug("[save_component] component %s" % component.pk)        
     
     if result:
         directory, name = os.path.split(result)
         component.uri = name
         component.save()
     else:
         log.error('Empty result passed to save_and_extract_features')
     self.deferred.callback(result)
Example #12
0
    def handle_result(self, result, component):
        log.debug("handle_result %s" % str(result))
        log.debug("[save_component] component %s" % component.pk)

        if result:
            directory, name = os.path.split(result)
            component.uri = name
            component.save()
        else:
            log.error("Empty result passed to save_and_extract_features")
        self.deferred.callback(result)
Example #13
0
 def execute(self, output_variant_name, output_type, **params):     
     # get basic data (avoid creating stuff in DB)
     try:
         self.get_cmdline(output_variant_name, output_type, **params)
         output_variant = Variant.objects.get(name = output_variant_name)
         self.out_comp = self.item.create_variant(output_variant, self.workspace, self.out_type)
         self.out_comp.source = self.source
         args = splitstring(self.cmdline)
     except Exception, e:
         log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e)))
         self.deferred.errback(e)
Example #14
0
 def _handle_err(self, result, item, schedule, action, params):
     log.error('_handle_err action %s on target_id=%s: %s' %
               (action, item.target_id, str(result)))
     self.outstanding -= 1
     cancelled = schedule.fail(action)
     self._update_item_stats(item, action, str(result), 0, 1, 0)
     for a in cancelled:
         self._update_item_stats(item, a, "cancelled on failed %s" % action,
                                 0, 0, 1)
     if self.outstanding < self.max_outstanding:
         #log.debug('_handle_err: rescheduling') #d
         reactor.callLater(0, self._iterate)
     item.save()
Example #15
0
 def execute(self, output_variant_name, output_type, **params):
     # get basic data (avoid creating stuff in DB)
     try:
         self.get_cmdline(output_variant_name, output_type, **params)
         output_variant = Variant.objects.get(name=output_variant_name)
         self.out_comp = self.item.create_variant(output_variant,
                                                  self.workspace,
                                                  self.out_type)
         self.out_comp.source = self.source
         args = splitstring(self.cmdline)
     except Exception, e:
         log.error('Error in %s: %s %s' %
                   (self.__class__.__name__, type(e), str(e)))
         self.deferred.errback(e)
Example #16
0
 def _cb_xmp_ok(self, features):
     try:
         ctype_component = ContentType.objects.get_for_model(self.component)
         ctype = ContentType.objects.get_for_model(self.item)
         xpath = re.compile(
             r'(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}'
         )
         user = self.item.uploaded_by()
         metadata_default_language = get_metadata_default_language(user)
     except Exception, e:
         log.error('Error in %s: %s %s' %
                   (self.__class__.__name__, type(e), str(e)))
         self.deferred.errback(e)
         return
Example #17
0
    def _read_xmp_features(self, features):
        xpath = re.compile(r"(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}")
        ctype = ContentType.objects.get_for_model(self.item)
        ctype_component = ContentType.objects.get_for_model(self.component)

        user = self.item.uploaded_by()
        metadata_default_language = get_metadata_default_language(user)

        metadata_dict = {}
        metadata_list = []
        delete_list = []

        log.debug("READ XMP FEATURES")

        if not isinstance(features, dict):
            item.state = 1
            item.save()
            return [], []

        for feature in features.keys():
            try:
                namespace_obj = XMPNamespace.objects.get(uri=feature)
            except Exception, e:
                log.error("#######  Error: unknown namespace %s: %s" % (feature, str(e)))
                continue

            metadata_dict[namespace_obj] = {}

            namespace_properties = MetadataProperty.objects.filter(namespace=namespace_obj)
            for property_values in features[feature]:
                property_xpath = property_values[0]
                property_value = property_values[1]
                property_options = property_values[2]
                xpath_splitted = xpath.findall(property_xpath)
                metadata_property = xpath_splitted[0][1].strip()
                metadata_index = xpath_splitted[0][2].strip()
                found_property = namespace_properties.filter(field_name__iexact=metadata_property)
                if found_property.count() > 0 and len(property_value.strip()) > 0:
                    if found_property[0].is_array == "not_array":
                        delete_list.append(found_property[0])
                    if property_options["IS_QUALIFIER"] and xpath_splitted[-1][1] == "lang":
                        # log.debug('############# setting throw away IS_QUALIFIER option')
                        find_xpath = property_xpath.replace("/?xml:lang", "")
                        if metadata_dict[namespace_obj].has_key(find_xpath):
                            if property_value == "x-default":
                                property_value = metadata_default_language
                            metadata_dict[namespace_obj][find_xpath].language = property_value
                        else:
                            log.debug("metadata property not found: " + find_xpath)
                            pass
                        # log.debug('###@@@@ %s: (%s)' % (find_xpath, property_value))
                    else:
                        if found_property[0].is_variant:
                            x = MetadataValue(
                                schema=found_property[0],
                                object_id=self.component.pk,
                                content_type=ctype_component,
                                value=property_value,
                                xpath=property_xpath,
                            )
                        else:
                            x = MetadataValue(
                                schema=found_property[0],
                                object_id=self.item.pk,
                                content_type=ctype,
                                value=property_value,
                                xpath=property_xpath,
                            )
                        metadata_dict[namespace_obj][property_xpath] = x
                        metadata_list.append(x)
Example #18
0
    def _cb_xmp_ok(self, features):
        try:
            ctype_component = ContentType.objects.get_for_model(self.component)
            ctype = ContentType.objects.get_for_model(self.item)
            xpath = re.compile(r"(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}")
            user = self.item.uploaded_by()
            metadata_default_language = get_metadata_default_language(user)
        except Exception, e:
            log.error("Error in %s: %s %s" % (self.__class__.__name__, type(e), str(e)))
            self.deferred.errback(e)
            return

        try:
            save_type(ctype, self.component)
        except Exception, e:
            log.error("Failed to save component format as DC:Format: %s" % (str(e)))
            self.deferred.errback(e)
            return

        try:
            xmp_metadata_list, xmp_delete_list = self._read_xmp_features(features)
            MetadataValue.objects.filter(
                schema__in=xmp_delete_list, object_id=self.component.pk, content_type=ctype_component
            ).delete()

            latitude = None
            longitude = None
            for x in xmp_metadata_list:
                if x.xpath == "exif:GPSLatitude":
                    latitude = x.value
                elif x.xpath == "exif:GPSLongitude":
Example #19
0
    def _read_xmp_features(self, features):
        xpath = re.compile(
            r'(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}')
        ctype = ContentType.objects.get_for_model(self.item)
        ctype_component = ContentType.objects.get_for_model(self.component)

        user = self.item.uploaded_by()
        metadata_default_language = get_metadata_default_language(user)

        metadata_dict = {}
        metadata_list = []
        delete_list = []

        log.debug('READ XMP FEATURES')

        if not isinstance(features, dict):
            item.state = 1
            item.save()
            return [], []

        for feature in features.keys():
            try:
                namespace_obj = XMPNamespace.objects.get(uri=feature)
            except Exception, e:
                log.error('#######  Error: unknown namespace %s: %s' %
                          (feature, str(e)))
                continue

            metadata_dict[namespace_obj] = {}

            namespace_properties = MetadataProperty.objects.filter(
                namespace=namespace_obj)
            for property_values in features[feature]:
                property_xpath = property_values[0]
                property_value = property_values[1]
                property_options = property_values[2]
                xpath_splitted = xpath.findall(property_xpath)
                metadata_property = xpath_splitted[0][1].strip()
                metadata_index = xpath_splitted[0][2].strip()
                found_property = namespace_properties.filter(
                    field_name__iexact=metadata_property)
                if found_property.count() > 0 and len(
                        property_value.strip()) > 0:
                    if found_property[0].is_array == 'not_array':
                        delete_list.append(found_property[0])
                    if property_options['IS_QUALIFIER'] and xpath_splitted[-1][
                            1] == 'lang':
                        #log.debug('############# setting throw away IS_QUALIFIER option')
                        find_xpath = property_xpath.replace('/?xml:lang', '')
                        if metadata_dict[namespace_obj].has_key(find_xpath):
                            if property_value == 'x-default':
                                property_value = metadata_default_language
                            metadata_dict[namespace_obj][
                                find_xpath].language = property_value
                        else:
                            log.debug('metadata property not found: ' +
                                      find_xpath)
                            pass
                        #log.debug('###@@@@ %s: (%s)' % (find_xpath, property_value))
                    else:
                        if found_property[0].is_variant:
                            x = MetadataValue(schema=found_property[0],
                                              object_id=self.component.pk,
                                              content_type=ctype_component,
                                              value=property_value,
                                              xpath=property_xpath)
                        else:
                            x = MetadataValue(schema=found_property[0],
                                              object_id=self.item.pk,
                                              content_type=ctype,
                                              value=property_value,
                                              xpath=property_xpath)
                        metadata_dict[namespace_obj][property_xpath] = x
                        metadata_list.append(x)
Example #20
0
            ctype = ContentType.objects.get_for_model(self.item)
            xpath = re.compile(
                r'(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}'
            )
            user = self.item.uploaded_by()
            metadata_default_language = get_metadata_default_language(user)
        except Exception, e:
            log.error('Error in %s: %s %s' %
                      (self.__class__.__name__, type(e), str(e)))
            self.deferred.errback(e)
            return

        try:
            save_type(ctype, self.component)
        except Exception, e:
            log.error("Failed to save component format as DC:Format: %s" %
                      (str(e)))
            self.deferred.errback(e)
            return

        try:
            xmp_metadata_list, xmp_delete_list = self._read_xmp_features(
                features)
            MetadataValue.objects.filter(
                schema__in=xmp_delete_list,
                object_id=self.component.pk,
                content_type=ctype_component).delete()

            latitude = None
            longitude = None
            for x in xmp_metadata_list:
                if x.xpath == 'exif:GPSLatitude':
Example #21
0
class ExtractBasic(Analyzer):
    md_server = "LowLoad"
    exe_list = {
        'image_basic': 'identify',
        'media_basic': 'mediainfo',
        'doc_basic': 'pdfinfo'
    }
    cmd_image_basic = '"file://%(infile)s"'
    cmd_media_basic = '-f "--Output=XML" "file://%(infile)s"'
    cmd_doc_basic = '"file://%(infile)s"'
    regex  = r'(?P<filename>[^ \[]*)(?P<frame>\[\d+\]){0,1}\s(?P<type>[\w._-]+)\s' \
             r'(?P<width>\d+)x(?P<height>\d+)\s' \
             r'(?P<wcrop>\d+)x(?P<hcrop>\d+)[\+-](?P<wcropoff>\d+)[\+-](?P<hcropoff>\d+)\s' \
             r'(?P<depth>\d+)-(?P<depth_unit>\w+)\s'
    RE = None

    def get_cmdline(self):
        extractor_type = self.source.get_extractor()
        log.debug('######333\n########## ExtractBasic: using %s' %
                  extractor_type)
        self.remote_exe = self.exe_list[extractor_type]
        self.cmdline = getattr(self, 'cmd_%s' % extractor_type)
        self.parser = getattr(self, 'parse_%s' % extractor_type)
        self.cmdline = self.cmdline % {'infile': self.source.uri}

    def parse_stdout(self, result):
        return self.parser(result, self.source.uri)

    def parse_image_basic(self, result, filename):
        features = {}
        if not self.RE:
            self.RE = re.compile(self.regex)
        fullpath = str(self._fc.abspath(filename))
        #log.debug('image_basic_extractor: entering')
        m = self.RE.match(result)
        if not m:
            raise Exception('Unable to parse script output')
        d = m.groupdict()
        size = os.stat(fullpath).st_size
        features['size'] = size
        features['width'] = d['width']
        features['height'] = d['height']
        features['codec'] = d['type'].lower()
        features['has_frame'] = d['frame'] is not None
        features['has_sound'] = False
        features['depth'] = d['depth']  # depth in bits
        features['depth_unit'] = d['depth_unit']  # depth in bits
        self._save_features(features, 'image_basic')
        return 'ok'

    def parse_media_basic(self, result, filename):
        log.debug('parse_media_basic: entering "%s"' % type(result))
        fullpath = str(self._fc.abspath(filename))
        parser = Parser()
        parseString(result.encode('utf-8'), parser)
        features = parser.parsed
        self._save_features(features, 'media_basic')
        return 'ok'

    def parse_doc_basic(self, result, filename):
        log.debug('parse_doc_basic: entering')
        features = {}
        lines = result.split('\n')
        for line in lines:
            sep = line.find(':')
            if sep < 0:
                continue
            key = line[:sep].strip()
            value = line[sep + 1:].strip()
            features[key] = value
        features['size'] = long(features.get('File size', '-1').split()[0])
        features['pages'] = features['Pages']
        self._save_features(features, 'doc_basic')
        return 'ok'

    def _save_features(self, features, extractor_type):
        "save results of basic extractions"
        metadata_list, delete_list = [], []
        #log.debug('ExtractBasicFeatures._save_features: %s' % features)
        ctype = ContentType.objects.get_for_model(self.source)
        try:
            save_type(ctype, self.source)
        except Exception, e:
            log.error("Failed to save component format as DC:Format: %s" %
                      (str(e)))
        try:
            self.source._features = dumps(features)
            self.source.save()
            print 'saved'
        except Exception, e:
            log.error("Failed to save features in component object: %s" %
                      str(e))