def get_cmdline(self): extractor_type = self.source.get_extractor() log.debug('######333\n########## ExtractBasic: using %s' % extractor_type) self.remote_exe = self.exe_list[extractor_type] self.cmdline = getattr(self, 'cmd_%s' % extractor_type) self.parser = getattr(self, 'parse_%s' % extractor_type) self.cmdline = self.cmdline % {'infile': self.source.uri}
def parse_media_basic(self, result, filename): log.debug('parse_media_basic: entering "%s"' % type(result)) fullpath = str(self._fc.abspath(filename)) parser = Parser() parseString(result.encode('utf-8'), parser) features = parser.parsed self._save_features(features, 'media_basic') return 'ok'
def _read_xmp_features(self, features): xpath = re.compile(r'(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}') ctype = ContentType.objects.get_for_model(self.item) ctype_component = ContentType.objects.get_for_model(self.component) user = self.item.uploaded_by() metadata_default_language = get_metadata_default_language(user) metadata_dict = {} metadata_list = [] delete_list = [] log.debug('READ XMP FEATURES') if not isinstance(features, dict): item.state = 1 item.save() return [], [] for feature in features.keys(): try: namespace_obj = XMPNamespace.objects.get(uri=feature) except Exception, e: log.error('####### Error: unknown namespace %s: %s' % (feature, str(e))) continue metadata_dict[namespace_obj] = {} namespace_properties = MetadataProperty.objects.filter(namespace=namespace_obj) for property_values in features[feature]: property_xpath = property_values[0] property_value = property_values[1] property_options = property_values[2] xpath_splitted = xpath.findall(property_xpath) metadata_property = xpath_splitted[0][1].strip() metadata_index = xpath_splitted[0][2].strip() found_property = namespace_properties.filter(field_name__iexact=metadata_property) if found_property.count() > 0 and len(property_value.strip()) > 0: if found_property[0].is_array == 'not_array': delete_list.append(found_property[0]) if property_options['IS_QUALIFIER'] and xpath_splitted[-1][1] == 'lang': #log.debug('############# setting throw away IS_QUALIFIER option') find_xpath = property_xpath.replace('/?xml:lang', '') if metadata_dict[namespace_obj].has_key(find_xpath): if property_value == 'x-default': property_value = metadata_default_language metadata_dict[namespace_obj][find_xpath].language = property_value else: log.debug('metadata property not found: ' + find_xpath) pass #log.debug('###@@@@ %s: (%s)' % (find_xpath, property_value)) else: if found_property[0].is_variant: x = MetadataValue(schema=found_property[0], object_id=self.component.pk, content_type=ctype_component, value=property_value, xpath=property_xpath) else: x = MetadataValue(schema=found_property[0], object_id=self.item.pk, content_type=ctype, value=property_value, xpath=property_xpath) metadata_dict[namespace_obj][property_xpath] = x metadata_list.append(x)
def parse_doc_basic(self, result, filename): log.debug('parse_doc_basic: entering') features = {} lines = result.split('\n') for line in lines: sep = line.find(':') if sep < 0: continue key = line[:sep].strip() value = line[sep+1:].strip() features[key] = value features['size'] = long(features.get('File size', '-1').split()[0]) features['pages'] = features['Pages'] self._save_features(features, 'doc_basic') return 'ok'
def handle_result(self, result): log.debug('handle_result %s' % str(result)) log.debug("[save_component] component %s" % self.out_comp.pk) log.debug('##############\n%s\n' % result['data']) directory, name = os.path.split(self.out_file) self.out_comp.uri = name self.out_comp.save() self.item.update_time = time.time() self.item.save() return self.out_file
longitude = None for x in xmp_metadata_list: if x.xpath == 'exif:GPSLatitude': latitude = x.value elif x.xpath == 'exif:GPSLongitude': longitude = x.value x.save() except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) return if latitude != None and longitude != None: try: GeoInfo.objects.save_geo_coords(self.component.item, latitude,longitude) except Exception, ex: log.debug( 'ex while saving latitude and longitude in dam db: %s'% ex) def extract_xmp(self): features = xmp_extractor.extract.delay(self.component.uri).get() self._cb_xmp_ok(features) def test(): print 'test' item = Item.objects.get(pk=1) workspace = DAMWorkspace.objects.get(pk = 1) d = run(4, workspace.pk, source_variant_name = ['original'], ) print 'addBoth'
def _save_metadata(self, features, ctype): c = self.source #log.debug('######## _save_metadata %s %s' % (c, features)) xmp_metadata_commons = {'size':[('notreDAM','FileSize')]} xmp_metadata_audio = {'channels':[('xmpDM', 'audioChannelType')], 'sample_rate':[('xmpDM', 'audioSampleRate')], 'duration':[('notreDAM', 'Duration')]} xmp_metadata_video = {'height':[('xmpDM', 'videoFrameSize','stDim','h')] , 'width':[('xmpDM', 'videoFrameSize','stDim','w')], 'r_frame_rate':[('xmpDM','videoFrameRate')], 'bit_rate':[('xmpDM','fileDataRate')], 'duration':[('notreDAM', 'Duration')]} xmp_metadata_image = {'height':[('tiff', 'ImageLength')] , 'width':[('tiff', 'ImageWidth')]} xmp_metadata_doc = {'pages': [('notreDAM', 'NPages')], 'Copyright': [('dc', 'rights')]} xmp_metadata_image.update(xmp_metadata_commons) xmp_metadata_audio.update(xmp_metadata_commons) xmp_metadata_doc.update(xmp_metadata_commons) xmp_metadata_video.update(xmp_metadata_audio) xmp_metadata = {'image': xmp_metadata_image, 'video': xmp_metadata_video, 'audio': xmp_metadata_audio, 'doc': xmp_metadata_doc} metadata_list = [] delete_list = [] media_type = c.media_type.name user = self.item.uploaded_by() metadata_default_language = get_metadata_default_language(user) for feature in features.keys(): if features[feature]=='' or features[feature] == '0': continue if feature == 'file_size': c.size = features[feature] if feature == 'height': c.height = features[feature] elif feature == 'width': c.width = features[feature] try: xmp_names = xmp_metadata[media_type][feature] except KeyError: continue for m in xmp_names: try: ms = MetadataProperty.objects.get(namespace__prefix=m[0], field_name= m[1]) except: log.debug( 'inside readfeatures, unknown metadata %s:%s ' % (m[0],m[1])) continue if ms.is_variant or c.variant.name == 'original': if len(m) == 4: property_xpath = "%s:%s[1]/%s:%s" % (m[0], m[1], m[2], m[3]) else: property_xpath = '' try: if ms.type == 'lang': x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], language=metadata_default_language, xpath=property_xpath) else: x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], xpath=property_xpath) metadata_list.append(x) delete_list.append(ms) except: log.debug('inside readfeatures, could not get %s' % ms) continue c.save() return (metadata_list, delete_list)
if self.get_cmdline is None: raise Exception('Analyzer is an abstract base class: instantiate a derived class') self._fc = Storage() self.workspace = workspace self.item, self.source = get_source_rendition(item_id, source_variant_name, workspace) def handle_result(self, result, *args): #log.debug('= handle_result %s' % str(result)[:128]) try: return self.parse_stdout(result['data'], *args) except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) raise def execute(self, **params): # get basic data (avoid creating stuff in DB) try: self.get_cmdline(**params) args = splitstring(self.cmdline) except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) raise else: if self.fake: log.debug('######### Command line:\n%s' % str(args)) else: result = generic_cmd.call.delay(self.remote_exe, args, self.env).get() return self.handle_result(result)