コード例 #1
0
ファイル: extract_basic.py プロジェクト: relic7/nd1404
 def get_cmdline(self):
     extractor_type = self.source.get_extractor()
     log.debug('######333\n########## ExtractBasic: using %s' % extractor_type)
     self.remote_exe = self.exe_list[extractor_type]
     self.cmdline = getattr(self, 'cmd_%s' % extractor_type)
     self.parser = getattr(self, 'parse_%s' % extractor_type)
     self.cmdline = self.cmdline % {'infile': self.source.uri}
コード例 #2
0
ファイル: extract_basic.py プロジェクト: relic7/nd1404
 def parse_media_basic(self, result, filename):
     log.debug('parse_media_basic: entering "%s"' % type(result))
     fullpath = str(self._fc.abspath(filename))
     parser = Parser()
     parseString(result.encode('utf-8'), parser)
     features = parser.parsed
     self._save_features(features, 'media_basic')
     return 'ok'
コード例 #3
0
ファイル: extract_xmp.py プロジェクト: relic7/nd1404
    def _read_xmp_features(self, features):
        xpath = re.compile(r'(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}')
        ctype = ContentType.objects.get_for_model(self.item)
        ctype_component = ContentType.objects.get_for_model(self.component)

        user = self.item.uploaded_by()
        metadata_default_language = get_metadata_default_language(user)

        metadata_dict = {}
        metadata_list = []
        delete_list = []

        log.debug('READ XMP FEATURES')

        if not isinstance(features, dict):
            item.state = 1  
            item.save()
            return [], []

        for feature in features.keys():
            try:
                namespace_obj = XMPNamespace.objects.get(uri=feature)
            except Exception, e:
                log.error('#######  Error: unknown namespace %s: %s' % (feature, str(e)))
                continue

            metadata_dict[namespace_obj] = {}

            namespace_properties = MetadataProperty.objects.filter(namespace=namespace_obj)
            for property_values in features[feature]:
                property_xpath = property_values[0]
                property_value = property_values[1]
                property_options = property_values[2]
                xpath_splitted = xpath.findall(property_xpath)
                metadata_property = xpath_splitted[0][1].strip()
                metadata_index = xpath_splitted[0][2].strip()
                found_property = namespace_properties.filter(field_name__iexact=metadata_property)
                if found_property.count() > 0 and len(property_value.strip()) > 0:
                    if found_property[0].is_array == 'not_array':
                        delete_list.append(found_property[0])
                    if property_options['IS_QUALIFIER'] and xpath_splitted[-1][1] == 'lang':
                        #log.debug('############# setting throw away IS_QUALIFIER option')
                        find_xpath = property_xpath.replace('/?xml:lang', '')
                        if metadata_dict[namespace_obj].has_key(find_xpath):
                            if property_value == 'x-default':
                                property_value = metadata_default_language
                            metadata_dict[namespace_obj][find_xpath].language = property_value
                        else:
                            log.debug('metadata property not found: ' + find_xpath)
                            pass
                        #log.debug('###@@@@ %s: (%s)' % (find_xpath, property_value))
                    else:
                        if found_property[0].is_variant:
                            x = MetadataValue(schema=found_property[0], object_id=self.component.pk, content_type=ctype_component, value=property_value, xpath=property_xpath)
                        else:
                            x = MetadataValue(schema=found_property[0], object_id=self.item.pk, content_type=ctype, value=property_value, xpath=property_xpath)
                        metadata_dict[namespace_obj][property_xpath] = x
                        metadata_list.append(x)
コード例 #4
0
ファイル: extract_basic.py プロジェクト: relic7/nd1404
 def parse_doc_basic(self, result, filename):
     log.debug('parse_doc_basic: entering')
     features = {}
     lines = result.split('\n')
     for line in lines:
         sep = line.find(':')
         if sep < 0:
             continue
         key = line[:sep].strip()
         value  = line[sep+1:].strip()
         features[key] = value
     features['size'] = long(features.get('File size', '-1').split()[0])
     features['pages'] = features['Pages']
     self._save_features(features, 'doc_basic')
     return 'ok'
コード例 #5
0
ファイル: adapter.py プロジェクト: relic7/nd1404
    def handle_result(self, result):
        log.debug('handle_result %s' % str(result))
        log.debug("[save_component] component %s" % self.out_comp.pk)        
        log.debug('##############\n%s\n' % result['data'])
        
        directory, name = os.path.split(self.out_file)
        self.out_comp.uri = name
        self.out_comp.save()
        self.item.update_time = time.time()
        self.item.save()

        return self.out_file
コード例 #6
0
ファイル: extract_xmp.py プロジェクト: relic7/nd1404
            longitude = None
            for x in xmp_metadata_list:
                if x.xpath == 'exif:GPSLatitude':
                    latitude = x.value
                elif x.xpath == 'exif:GPSLongitude':
                    longitude = x.value
                x.save()
        except Exception, e:
            log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e)))
            return

        if latitude != None and longitude != None:
            try:
                GeoInfo.objects.save_geo_coords(self.component.item, latitude,longitude)
            except Exception, ex:
                log.debug( 'ex while saving latitude and longitude in dam db: %s'% ex)

    def extract_xmp(self):
        features = xmp_extractor.extract.delay(self.component.uri).get()
        self._cb_xmp_ok(features)

def test():
    print 'test'
    item = Item.objects.get(pk=1)
    workspace = DAMWorkspace.objects.get(pk = 1)
    
    d = run(4,
            workspace.pk,
            source_variant_name = ['original'],
            )
    print 'addBoth'
コード例 #7
0
ファイル: extract_basic.py プロジェクト: relic7/nd1404
    def _save_metadata(self, features, ctype):
        c = self.source
        #log.debug('######## _save_metadata %s %s' % (c, features))

        xmp_metadata_commons = {'size':[('notreDAM','FileSize')]}
        xmp_metadata_audio = {'channels':[('xmpDM', 'audioChannelType')], 'sample_rate':[('xmpDM', 'audioSampleRate')], 'duration':[('notreDAM', 'Duration')]}
        xmp_metadata_video = {'height':[('xmpDM', 'videoFrameSize','stDim','h')] , 'width':[('xmpDM', 'videoFrameSize','stDim','w')], 'r_frame_rate':[('xmpDM','videoFrameRate')], 'bit_rate':[('xmpDM','fileDataRate')], 'duration':[('notreDAM', 'Duration')]}
        xmp_metadata_image = {'height':[('tiff', 'ImageLength')] , 'width':[('tiff', 'ImageWidth')]}
        xmp_metadata_doc = {'pages': [('notreDAM', 'NPages')], 'Copyright': [('dc', 'rights')]}
        xmp_metadata_image.update(xmp_metadata_commons)
        xmp_metadata_audio.update(xmp_metadata_commons)
        xmp_metadata_doc.update(xmp_metadata_commons)

        xmp_metadata_video.update(xmp_metadata_audio)
        xmp_metadata = {'image': xmp_metadata_image, 'video': xmp_metadata_video, 'audio': xmp_metadata_audio, 'doc': xmp_metadata_doc}

        metadata_list = []
        delete_list = []

        media_type = c.media_type.name
        user = self.item.uploaded_by()
        metadata_default_language = get_metadata_default_language(user)

        for feature in features.keys():
            if features[feature]=='' or features[feature] == '0':
                continue 
            if feature == 'file_size':
                c.size = features[feature]
            if feature == 'height':
                c.height = features[feature]
            elif feature == 'width':
                c.width = features[feature]

            try:
                xmp_names = xmp_metadata[media_type][feature]
            except KeyError:
                continue

            for m in xmp_names:
                try:
                    ms = MetadataProperty.objects.get(namespace__prefix=m[0], field_name= m[1])
                except:
                    log.debug( 'inside readfeatures, unknown metadata %s:%s ' %  (m[0],m[1]))
                    continue
                if ms.is_variant or c.variant.name == 'original':
                    if len(m) == 4:
                        property_xpath = "%s:%s[1]/%s:%s" % (m[0], m[1], m[2], m[3])
                    else:
                        property_xpath = ''
                    try:
                        if ms.type == 'lang':
                            x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], language=metadata_default_language, xpath=property_xpath)
                        else:                            
                            x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], xpath=property_xpath)
                        metadata_list.append(x) 
                        delete_list.append(ms) 
                    except:
                        log.debug('inside readfeatures, could not get %s' %  ms)
                        continue
        c.save()
        return (metadata_list, delete_list)
コード例 #8
0
ファイル: analyzer.py プロジェクト: relic7/nd1404
        if self.get_cmdline is None:
            raise Exception('Analyzer is an abstract base class: instantiate a derived class')
        self._fc = Storage()
        self.workspace = workspace
        self.item, self.source = get_source_rendition(item_id, source_variant_name, workspace)

    def handle_result(self, result, *args):
        #log.debug('= handle_result %s' % str(result)[:128])
        try:
            return self.parse_stdout(result['data'], *args)
        except Exception, e:
            log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e)))
            raise

    def execute(self, **params):     
        # get basic data (avoid creating stuff in DB)
        try:
            self.get_cmdline(**params)
            args = splitstring(self.cmdline)
        except Exception, e:
            log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e)))
            raise
        else:
            if self.fake:
                log.debug('######### Command line:\n%s' % str(args))
            else:
                result = generic_cmd.call.delay(self.remote_exe,
                                                args, self.env).get()
                return self.handle_result(result)